/* //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
/* // ENBSeries effect file
/* // visit http://enbdev.com for updates
/* // Copyright  2007-2011 Boris Vorontsov
/* //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
/* // MasterEffect 1.5 by Marty McFly
/* // Copypright  2009-2014 Marty McFly
/* //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 


#include "Common.cfi"
#include "ShadeLib.cfi"
#include "PostEffectsLib.cfi"

// Shader global descriptions
float Script : STANDARDSGLOBAL
<
  string Script =
           "NoPreview;"
           "LocalConstants;"
           "ShaderDrawType = Custom;"
           "ShaderType = PostProcess;"
>; 

sampler2D rainbowSampler = sampler_state
{
  Texture = textures/defaults/glitter_color.dds;
  MinFilter = LINEAR;  
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
};

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Texture To Texture technique /////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 texToTexParams0;
float4 texToTexParams1;

////////////////// samplers /////////////////////

///////////////// vertex shader //////////////////

struct vtxOutTexToTex
{
  float4 HPosition  : POSITION;
  float4 baseTC0 : TEXCOORDN;    
  float4 baseTC1 : TEXCOORDN;    
  float4 baseTC2 : TEXCOORDN;    
  float4 baseTC3 : TEXCOORDN;    
  float4 baseTC4 : TEXCOORDN;    
};

vtxOutTexToTex TexToTexVS(vtxIn IN)
{
  vtxOutTexToTex OUT = (vtxOutTexToTex)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  

  OUT.baseTC0.xy = IN.baseTC.xy;
  OUT.baseTC1.xy = IN.baseTC.xy+texToTexParams0.xy;
  OUT.baseTC2.xy = IN.baseTC.xy+texToTexParams0.zw;
  OUT.baseTC3.xy = IN.baseTC.xy+texToTexParams1.xy;
  OUT.baseTC4.xy = IN.baseTC.xy+texToTexParams1.zw; 
  
  return OUT;
}

///////////////// pixel shader //////////////////
pixout TexToTexPS(vtxOutTexToTex IN)
{
  pixout OUT;
  OUT.Color = tex2D(_tex0, IN.baseTC0.xy);    
  return OUT;
}

// With rotated grid sampling (less artifacts). Used for image rescaling
pixout TexToTexSampledPS(vtxOutTexToTex IN)
{
  pixout OUT;

  half4 baseColor0 = tex2D(_tex0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(_tex0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(_tex0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(_tex0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(_tex0, IN.baseTC4.xy);

  OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)*0.2f;
   
  return OUT;
}


// Version for SSAO z-target
pixout TexToTexSampledAOPS(vtxOutTexToTex IN)
{
  pixout OUT;
      
  half4 baseColor0 = tex2D(_tex0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(_tex0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(_tex0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(_tex0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(_tex0, IN.baseTC4.xy);
  
  // Use max to prevent artifacts.
  OUT.Color = max(baseColor0, max(max(baseColor1.r, baseColor3.r), max(baseColor2.r, baseColor4.r)));
  //OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)*0.2f;
   
  return OUT;
}

////////////////// technique /////////////////////

technique TextureToTextureResampledAO
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexSampledAOPS();
    CullMode = None;        
  }
}

technique TextureToTexture
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexPS();
    CullMode = None;        
  }
}

technique TextureToTextureResampled
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexSampledPS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Post-Process Anti Aliasing technique ///////////////////////////////////////////////////////////
///	FXAA technique and original code is copyright (C) NVIDIA by Timothy Lottes

/// Specific data ////////////////////////
static const float4 fxaaParams0 = {0.08f, 0.16f, 0.75f, 0.25f};
static const float4 fxaaParams1 = {4.f, 0.05f, 0.125f, 0.0f}; 

/// Constants ////////////////////////////

/// Samplers ////////////////////////////

///////////////// vertex shader //////////////////

struct vtxOutFXAA
{
  float4 HPosition  : POSITION;
  float4 baseTC     : TEXCOORD0;
  float4 baseTC1	: TEXCOORD1;
};

vtxOutFXAA FXAA_VS(vtxIn IN)
{
  vtxOutFXAA OUT = (vtxOutFXAA)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  // Output with subpixel offset into wz
  OUT.baseTC1.xy = IN.baseTC.xy - 0.5 * g_VS_ScreenSize.zw;
  OUT.baseTC1.zw = IN.baseTC.xy + 0.5 * g_VS_ScreenSize.zw;

  return OUT;
}

///////////////// pixel shader //////////////////

pixout FXAA_PS(vtxOutFXAA IN)
{
  pixout OUT = (pixout)0;

	// Pixel sizes.
	float2 vPixelSizes = PS_ScreenSize.zw * 2.0;
	
	// Initial sample. Used on early-out.
	float4 cSampleCenter = tex2Dlod(_tex0, float4(IN.baseTC.xy,0,0));
	OUT.Color = cSampleCenter;

	float fLumCenter = cSampleCenter.w;
	float fLumBottom = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 0, 1) * vPixelSizes.xy,0,0)).w;
	float fLumRight  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1, 0) * vPixelSizes.xy,0,0)).w;
	float fLumTop    = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 0,-1) * vPixelSizes.xy,0,0)).w;
	float fLumLeft   = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1, 0) * vPixelSizes.xy,0,0)).w;

    float fMaxRange = max(max(fLumTop, fLumLeft), max(fLumRight, max(fLumBottom, fLumCenter)));
    float fMinRange = min(min(fLumTop, fLumLeft), min(fLumRight, min(fLumBottom, fLumCenter)));
    float fRange = fMaxRange - fMinRange;
    
    // Early out.
    if(fRange < max(fxaaParams0.x, fMaxRange * fxaaParams0.y))
		return OUT;
		
	float fLumTopLeft     = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1,-1) * vPixelSizes.xy,0,0)).w;
	float fLumBottomRight = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1, 1) * vPixelSizes.xy,0,0)).w;
	float fLumTopRight	  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1,-1) * vPixelSizes.xy,0,0)).w;
	float fLumBottomLeft  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1, 1) * vPixelSizes.xy,0,0)).w;
    
    float fLumTopBottom = fLumTop  + fLumBottom;
    float fLumLeftRight = fLumLeft + fLumRight;
    float fLumSubPixel = fLumTopBottom + fLumLeftRight;
    
    float fEdgeH1 = (-2.0 * fLumCenter) + fLumTopBottom;
    float fEdgeV1 = (-2.0 * fLumCenter) + fLumLeftRight;

    float fLumTopBottomRight = fLumTopRight + fLumBottomRight;
    float fLumTopLeftRight = fLumTopLeft + fLumTopRight;
    float fEdgeH2 = (-2.0 * fLumRight) + fLumTopBottomRight;
    float fEdgeV2 = (-2.0 * fLumTop) + fLumTopLeftRight;

    float fLumTopBottomLeft = fLumTopLeft + fLumBottomLeft;
    float fLumBottomLeftRight = fLumBottomLeft + fLumBottomRight;
    float fEdgeH4 = (abs(fEdgeH1) * 2.0) + abs(fEdgeH2);
    float fEdgeV4 = (abs(fEdgeV1) * 2.0) + abs(fEdgeV2);
    float fEdgeH3 = (-2.0 * fLumLeft) + fLumTopBottomLeft;
    float fEdgeV3 = (-2.0 * fLumBottom) + fLumBottomLeftRight;
    float fEdgeH = abs(fEdgeH3) + fEdgeH4;
    float fEdgeV = abs(fEdgeV3) + fEdgeV4;

    float fBlendSubPixel = fLumTopBottomLeft + fLumTopBottomRight; 
    float fLengthSign = vPixelSizes.x;
    bool bHorizontalSpan = fEdgeH >= fEdgeV;
    float fSubPixelA = fLumSubPixel * 2.0 + fBlendSubPixel; 

    if(!bHorizontalSpan) fLumTop = fLumLeft; 
    if(!bHorizontalSpan) fLumBottom = fLumRight;
    if(bHorizontalSpan) fLengthSign = vPixelSizes.y;
    float fSubPixelB = (fSubPixelA * (1.0/12.0)) - fLumCenter;	
        
    float fGradientN = fLumTop - fLumCenter;
    float fGradientS = fLumBottom - fLumCenter;
    float fLumTopCenter = fLumTop + fLumCenter;
    float fLumBottomCenter = fLumBottom + fLumCenter;
    bool fPairN = abs(fGradientN) >= abs(fGradientS);
    float fGradient = max(abs(fGradientN), abs(fGradientS));
    if(fPairN) fLengthSign = -fLengthSign;
    float fSubPixelC = saturate(abs(fSubPixelB) * (1.0 / fRange));
    
    float2 vPositionB;
    vPositionB.x = IN.baseTC.x;
    vPositionB.y = IN.baseTC.y;
    float2 vOffsetNP;
    vOffsetNP.x = (!bHorizontalSpan) ? 0.0 : vPixelSizes.x;
    vOffsetNP.y = ( bHorizontalSpan) ? 0.0 : vPixelSizes.y;
    if(!bHorizontalSpan) vPositionB.x += fLengthSign * 0.5;
    if( bHorizontalSpan) vPositionB.y += fLengthSign * 0.5;
    
    float2 vPositionN;
    vPositionN.x = vPositionB.x - vOffsetNP.x;
    vPositionN.y = vPositionB.y - vOffsetNP.y;
    
    float2 vPositionP;
    vPositionP.x = vPositionB.x + vOffsetNP.x;
    vPositionP.y = vPositionB.y + vOffsetNP.y;
    
    float fSubPixelD = ((-2.0)*fSubPixelC) + 3.0;
    float fLumEndN = tex2Dlod(_tex0, float4(vPositionN,0,0)).w;
    float fLumEndP = tex2Dlod(_tex0, float4(vPositionP,0,0)).w;
    
    float fSubPixelE = (fSubPixelC * fSubPixelC);
    
    if(!fPairN) fLumTopCenter = fLumBottomCenter;
    float fGradientScaled = fGradient * 1.0/4.0;
    float fSubPixelF = fSubPixelD * fSubPixelE;
    bool bLumZero = (fLumCenter - fLumTopCenter * 0.5) < 0.0;
    
    fLumEndN -= fLumTopCenter * 0.5;
    fLumEndP -= fLumTopCenter * 0.5;
    bool bDoneN = abs(fLumEndN) >= fGradientScaled;
    bool bDoneP = abs(fLumEndP) >= fGradientScaled;
    if(!bDoneN) vPositionN.x -= vOffsetNP.x;
    if(!bDoneN) vPositionN.y -= vOffsetNP.y;
    bool bDoneNP = (!bDoneN) || (!bDoneP);
    if(!bDoneP) vPositionP.x += vOffsetNP.x;
    if(!bDoneP) vPositionP.y += vOffsetNP.y;
    
    static const half fSearchScale[11] = {1.0, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0};

    #if D3D10
    [unroll]
    #endif
    // Search edges.
    for(int i = 0; i < 11; i++)
    {
        if(!bDoneN) fLumEndN = tex2Dlod(_tex0, float4(vPositionN.xy,0,0)).w;
        if(!bDoneP) fLumEndP = tex2Dlod(_tex0, float4(vPositionP.xy,0,0)).w;
        if(!bDoneN) fLumEndN = fLumEndN - fLumTopCenter * 0.5;
        if(!bDoneP) fLumEndP = fLumEndP - fLumTopCenter * 0.5;
        bDoneN = abs(fLumEndN) >= fGradientScaled;
        bDoneP = abs(fLumEndP) >= fGradientScaled;
        if(!bDoneN) vPositionN.x -= vOffsetNP.x * fSearchScale[i];
        if(!bDoneN) vPositionN.y -= vOffsetNP.y * fSearchScale[i];
        bDoneNP = (!bDoneN) || (!bDoneP);
        if(!bDoneP) vPositionP.x += vOffsetNP.x * fSearchScale[i];
        if(!bDoneP) vPositionP.y += vOffsetNP.y * fSearchScale[i];
    }
                    
    float fDestN = IN.baseTC.x - vPositionN.x;
    float fDestP = vPositionP.x - IN.baseTC.x;
    
    if(!bHorizontalSpan) fDestN = IN.baseTC.y - vPositionN.y;
    if(!bHorizontalSpan) fDestP = vPositionP.y - IN.baseTC.y;
    
    float fSpanLength = (fDestP + fDestN);
    bool bGoodSpanN = (fLumEndN < 0.0) != bLumZero;
    bool bGoodSpanP = (fLumEndP < 0.0) != bLumZero;

    bool bDirectionN = fDestN < fDestP;
    float fDest = min(fDestN, fDestP);
    bool bGoodSpan = bDirectionN ? bGoodSpanN : bGoodSpanP;
    float fSubPixelG = fSubPixelF * fSubPixelF;
    float fPixelOffset = (fDest * (-(1.0/fSpanLength))) + 0.5;
    float fSubPixelH = fSubPixelG * fxaaParams0.z;

    float fPixelOffsetGood = bGoodSpan ? fPixelOffset : 0.0;
    float fPixelOffsetSubpix = max(fPixelOffsetGood, fSubPixelH);
    if(!bHorizontalSpan) IN.baseTC.x += fPixelOffsetSubpix * fLengthSign;
    if( bHorizontalSpan) IN.baseTC.y += fPixelOffsetSubpix * fLengthSign;
    
    OUT.Color = tex2Dlod(_tex0, float4(IN.baseTC.xy,0,0));
        
    return OUT;
}

///////////////// pixel shader //////////////////
pixout FXAAFast_PS(vtxOutFXAA IN)
{
  pixout OUT = (pixout)0;

  const half4 vPixelSizes = PS_ScreenSize.zwzw * half4(4.0, 4.0, 1.0, 1.0);

  // Initial sample. Used on early-out.
  float4 cSampleCenter = tex2Dlod(_tex0, half4(IN.baseTC.xy,0,0));
  OUT.Color = cSampleCenter;

  half4 vDir;
  vDir.y = 0.0;
  half4 fLumTopRight = tex2Dlod(_tex0, half4(IN.baseTC1.zy,0,0));
  fLumTopRight.w += half(1.0 / 384.0);
  vDir.x = -fLumTopRight.w;
  vDir.z = -fLumTopRight.w;
	
  half4 fLumBottomLeft = tex2Dlod(_tex0, half4(IN.baseTC1.xw,0,0));
  vDir.x += fLumBottomLeft.w;
  vDir.z += fLumBottomLeft.w;
  
  half4 fLumTopLeft = tex2Dlod(_tex0, half4(IN.baseTC1.xy,0,0));
  vDir.x -= fLumTopLeft.w;
  vDir.z += fLumTopLeft.w;	
   
  half4 fLumBottomRight = tex2Dlod(_tex0, half4(IN.baseTC1.zw,0,0));
  vDir.x += fLumBottomRight.w;
  vDir.z -= fLumBottomRight.w;
    
  half fLumMin = min(min(fLumTopLeft.w, fLumBottomLeft.w), min(fLumTopRight.w, fLumBottomRight.w));
  half fLumMax = max(max(fLumTopLeft.w, fLumBottomLeft.w), max(fLumTopRight.w, fLumBottomRight.w));
   
  if((max(fLumMax, cSampleCenter.w) - min(fLumMin, cSampleCenter.w)) < max(fxaaParams1.y, fLumMax * fxaaParams1.z))
	return OUT;
	
  half4 vDir1;
  vDir1.xy = normalize(vDir.xyz).xz;
  half fDirAbsMinTimesC = min(abs(vDir1.x), abs(vDir1.y)) * fxaaParams1.x;

  half4 vDir2;
  vDir2.xy = clamp(vDir1.xy / fDirAbsMinTimesC, -2.0h, 2.0h);
  vDir1.zw = IN.baseTC.xy;
  vDir2.zw = IN.baseTC.xy;
  half4 temp1N;
  temp1N.xy = vDir1.zw - vDir1.xy * vPixelSizes.zw;
  
  temp1N = tex2Dlod(_tex0, float4(temp1N.xy,0,0));
  half4 rgby1;
  rgby1.xy = vDir1.zw + vDir1.xy * vPixelSizes.zw;
  
  rgby1 = tex2Dlod(_tex0, float4(rgby1.xy,0,0));
  rgby1 = (temp1N + rgby1) * 0.5;
  
  half4 temp2N;
  temp2N.xy = vDir2.zw - vDir2.xy * vPixelSizes.xy;
  temp2N = tex2Dlod(_tex0, float4(temp2N.xy,0,0));
  
  half4 rgby2;
  rgby2.xy = vDir2.zw + vDir2.xy * vPixelSizes.xy;
  rgby2 = tex2D(_tex0, float4(rgby2.xy,0,0));
  rgby2 = (temp2N + rgby2) * 0.5;
    
  rgby2 = (rgby2 + rgby1) * 0.5;
    
  bool twoTapLt = rgby2.w < fLumMin;
  bool twoTapGt = rgby2.w > fLumMax;
  
  if(twoTapLt || twoTapGt) rgby2 = rgby1;
    
  OUT.Color = rgby2;
 
  return OUT;
}

////////////////// technique /////////////////////

technique FXAA
{
  pass p0
  {        
    CullMode = None;        
    VertexShader = CompileVS FXAA_VS();
	PixelShader = CompilePS FXAA_PS();
  }
}

technique FXAAFast
{ 
  pass p0
  {        
    CullMode = None;  
	VertexShader = CompileVS FXAA_VS();
	PixelShader = CompilePS FXAAFast_PS();
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Clear screen technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 clrScrParams;

/// Samplers ////////////////////////////
// none

///////////////// vertex shader //////////////////

struct vtxOutClrScr
{
  float4 HPosition  : POSITION;
};

vtxOutClrScr ClearScreenVS(vtxIn IN)
{
  vtxOutClrScr OUT = (vtxOutClrScr)0; 
  OUT.HPosition = mul(vpMatrix, IN.Position);    
  return OUT;
}

///////////////// pixel shader //////////////////
pixout ClearScreenPS(vtxOutClrScr IN)
{
  pixout OUT;  
  OUT.Color = clrScrParams;        
  return OUT;
}

////////////////// technique /////////////////////
technique ClearScreen
{
  pass p0
  {
    VertexShader = CompileVS ClearScreenVS();
    PixelShader = CompilePS ClearScreenPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Kawase Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 blurParams0;
float4 blurParams1;

/// Samplers ////////////////////////////

// none
sampler2D blurMap0 : register(s0);

///////////////// vertex shader //////////////////

struct vtxOutKawase
{
  float4 HPosition  : POSITION;
  float2 baseTC0 : TEXCOORDN;    
  float2 baseTC1 : TEXCOORDN;    
  float2 baseTC2 : TEXCOORDN;    
  float2 baseTC3 : TEXCOORDN;    
  float2 baseTC4 : TEXCOORDN;    
};

vtxOutKawase KawaseBlurVS(vtxIn IN)
{
  vtxOutKawase OUT = (vtxOutKawase)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC0.xy = IN.baseTC.xy; // Also sample midle pixel to keep some detail
  OUT.baseTC1.xy = IN.baseTC.xy+blurParams0.xy;
  OUT.baseTC2.xy = IN.baseTC.xy+blurParams0.zw;
  OUT.baseTC3.xy = IN.baseTC.xy+blurParams1.xy;
  OUT.baseTC4.xy = IN.baseTC.xy+blurParams1.zw;

  return OUT;
}

///////////////// pixel shader //////////////////
pixout KawaseBlurPS(vtxOutKawase IN)
{
  pixout OUT;
  
  half4 baseColor0 = tex2D(blurMap0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(blurMap0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(blurMap0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(blurMap0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(blurMap0, IN.baseTC4.xy);
  
  OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)/5.0;        
  
  return OUT;
}

////////////////// technique /////////////////////
technique KawaseBlur
{
  pass p0
  {
    VertexShader = CompileVS KawaseBlurVS();
    PixelShader = CompilePS KawaseBlurPS();
    
    CullMode = Back;        
  }
}

// =================================================================================================
// Technique: GaussBlur/GaussBlurBilinear
// Description: Applies a separatable vertical/horizontal gaussian blur filter
// =================================================================================================

float4 PI_psOffsets[16];
float4 psWeights[16];

struct vtxOutGauss
{
  float4 HPosition : POSITION;
  float2 baseTC : TEXCOORDN;  
  float4 tc0 : TEXCOORDN;    
  float4 tc1 : TEXCOORDN;    
  float4 tc2 : TEXCOORDN;    
  float4 tc3 : TEXCOORDN;   
  float4 tc4 : TEXCOORDN;    
  float4 tc5 : TEXCOORDN;    
  float4 tc6 : TEXCOORDN;    
  float4 tc7 : TEXCOORDN;  
};

struct vtxOutGaussMasked
{
  float4 HPosition : POSITION;
  float4 tc0 : TEXCOORDN;    
  float4 tc1 : TEXCOORDN;    
  float2 tc2 : TEXCOORDN;    
  float2 tc3 : TEXCOORDN;    
  float2 tc4 : TEXCOORDN;    
  float2 tc5 : TEXCOORDN;      
  float2 tc6 : TEXCOORDN;    
  float2 tc7 : TEXCOORDN;    
};

vtxOutGauss GaussBlurBilinearVS(vtxIn IN)
{
  vtxOutGauss OUT = (vtxOutGauss) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  #if !%_RT_SAMPLE0
	  // Coordinates for wider bloom blur.
	  half2 fScale = 750.0f * ScrSize.zw * float2(0.75*(ScrSize.x/ScrSize.y), 1.0);

	  OUT.tc0.wz = IN.baseTC.xy + PI_psOffsets[0].xy * fScale;
	  OUT.tc1.wz = IN.baseTC.xy + PI_psOffsets[1].xy * fScale;
	  OUT.tc2.wz = IN.baseTC.xy + PI_psOffsets[2].xy * fScale;
	  OUT.tc3.wz = IN.baseTC.xy + PI_psOffsets[3].xy * fScale;
	  OUT.tc4.wz = IN.baseTC.xy + PI_psOffsets[4].xy * fScale;
	  OUT.tc5.wz = IN.baseTC.xy + PI_psOffsets[5].xy * fScale;
	  OUT.tc6.wz = IN.baseTC.xy + PI_psOffsets[6].xy * fScale;
	  OUT.tc7.wz = IN.baseTC.xy + PI_psOffsets[7].xy * fScale;
  #endif

  return OUT;
}

vtxOutGaussMasked MaskedGaussBlurBilinearVS(vtxIn IN)
{
  vtxOutGaussMasked OUT = (vtxOutGaussMasked) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  // output with correct aspect ratio into wz
  OUT.tc0.wz = IN.baseTC.xy;
  OUT.tc1.wz = (IN.baseTC.xy -0.5 ) * float2(0.75*(ScrSize.x/ScrSize.y), 1.0) + 0.5;

  return OUT;
}

pixout GaussBlurBilinearPS(vtxOutGauss IN)
{
  pixout OUT;

  // Alpha channel remains unblurred for skin mask.
  half4 sum = tex2D(_tex0, IN.baseTC.xy);
    
  // Sample taps for blur.
  half4 col = tex2D(_tex0, IN.tc0.xy);  	
  sum = col * (half) psWeights[0].x;  

  col = tex2D(_tex0, IN.tc1.xy);  
  sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy);  
  sum += col * (half) psWeights[2].x;  

  col = tex2D(_tex0, IN.tc3.xy);  
  sum += col * (half) psWeights[3].x;  

  col = tex2D(_tex0, IN.tc4.xy);  
  sum += col * (half) psWeights[4].x;  
	
  col = tex2D(_tex0, IN.tc5.xy);  
  sum += col * (half) psWeights[5].x;  
	
  col = tex2D(_tex0, IN.tc6.xy);  
  sum += col * (half) psWeights[6].x;  
	
  col = tex2D(_tex0, IN.tc7.xy);  
  sum += col * (half) psWeights[7].x;  
  
  OUT.Color = sum;
  
  #if !%_RT_SAMPLE0
  	  // Second pass, wider blur for bloom
	  col = tex2D(_tex0, IN.tc0.wz);  	
	  sum.rgb += col * (half) psWeights[0].x;  

	  col = tex2D(_tex0, IN.tc1.wz);  
	  sum.rgb += col * (half) psWeights[1].x;  
		
	  col = tex2D(_tex0, IN.tc2.wz);  
	  sum.rgb += col * (half) psWeights[2].x;  

	  col = tex2D(_tex0, IN.tc3.wz);  
	  sum.rgb += col * (half) psWeights[3].x;  

	  col = tex2D(_tex0, IN.tc4.wz);  
	  sum.rgb += col * (half) psWeights[4].x;  
		
	  col = tex2D(_tex0, IN.tc5.wz);  
	  sum.rgb += col * (half) psWeights[5].x;  
		
	  col = tex2D(_tex0, IN.tc6.wz);  
	  sum.rgb += col * (half) psWeights[6].x;  
		
	  col = tex2D(_tex0, IN.tc7.wz);  
	  sum.rgb += col * (half) psWeights[7].x;  
	  
	  OUT.Color.rgb = sum.rgb * 0.5;
  #endif
  
  return OUT;
}

pixout MaskedGaussBlurBilinearPS(vtxOutGaussMasked IN)
{
  pixout OUT;

  half4 sum = 0;
  half4 orig = tex2D(_tex0, IN.tc0.wz);
  half mask = tex2D(_tex1, IN.tc1.wz).x;
  
  half4 col = tex2D(_tex0, IN.tc0.xy);  	
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[0].x;  

  col = tex2D(_tex0, IN.tc1.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[2].x;  

  col = tex2D(_tex0, IN.tc3.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[3].x;  

  col = tex2D(_tex0, IN.tc4.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[4].x;  
	
  col = tex2D(_tex0, IN.tc5.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[5].x;  
	
  col = tex2D(_tex0, IN.tc6.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[6].x;  
	
  col = tex2D(_tex0, IN.tc7.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[7].x;
  
  OUT.Color = sum;

  return OUT;
}

pixout GaussBlurBilinearEncodedPS(vtxOutGaussMasked IN)
{
  pixout OUT;
      
  half3 sum = 0;
  half3 col = DecodeRGBS( tex2D(_tex0, IN.tc0.xy) );  	
  sum += col * (half) psWeights[0].x;  

  col = DecodeRGBS( tex2D(_tex0, IN.tc1.xy) );  
  sum += col * (half) psWeights[1].x;  
	
  col = DecodeRGBS( tex2D(_tex0, IN.tc2.xy) );  
  sum += col * (half) psWeights[2].x;  

  col = DecodeRGBS(tex2D(_tex0, IN.tc3.xy) );  
  sum += col * (half) psWeights[3].x;  

  col = DecodeRGBS(tex2D(_tex0, IN.tc4.xy) );  
  sum += col * (half) psWeights[4].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc5.xy) );  
  sum += col * (half) psWeights[5].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc6.xy) );  
  sum += col * (half) psWeights[6].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc7.xy) );  
  sum += col * (half) psWeights[7].x;  

  OUT.Color = EncodeRGBS( float4( sum.xyz, 1) );
    
  return OUT;
}

// Optimized gauss blur version, making use of bilinear filtering
technique GaussBlurBilinear
{
  pass p0
  {
    VertexShader = CompileVS GaussBlurBilinearVS();
    PixelShader = CompilePS GaussBlurBilinearPS();    
  }
}

technique MaskedGaussBlurBilinear
{
  pass p0
  {
    VertexShader = CompileVS MaskedGaussBlurBilinearVS();
    PixelShader = CompilePS MaskedGaussBlurBilinearPS();    
  }
}

technique GaussBlurBilinearEncoded
{
  pass p0
  {
    VertexShader = CompileVS MaskedGaussBlurBilinearVS();
    PixelShader = CompilePS GaussBlurBilinearEncodedPS();    
  }
}

// ===================================================================================================
// Technique: GaussAlphaBlur
// Description: Applies a separatable vertical/horizontal gaussian blur filter for alpha channel only
// ===================================================================================================
// FIX:: oprimize
struct vtxOutAlphaBlur
{
  float4 HPosition : POSITION;
  float4 tc0 : TEXCOORDN;    
  float2 tc1 : TEXCOORDN;    
  float2 tc2 : TEXCOORDN;    
  float2 tc3 : TEXCOORDN;    
  float2 tc4 : TEXCOORDN;    
  float2 tc5 : TEXCOORDN;      
  float2 tc6 : TEXCOORDN;    
  float2 tc7 : TEXCOORDN;    
};

vtxOutAlphaBlur GaussAlphaBlurVS(vtxIn IN)
{
  vtxOutAlphaBlur OUT = (vtxOutAlphaBlur) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
	OUT.tc0.zw = IN.baseTC.xy;

  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  return OUT;
}

pixout GaussAlphaBlurPS(vtxOutAlphaBlur IN)
{
  pixout OUT;

  half sum = 0;
  
	half col = tex2D(_tex0, IN.tc0.xy).a ;  	
	sum += col * (half) psWeights[0].x;  

	col = tex2D(_tex0, IN.tc1.xy).a ;  
	sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy).a ;  
	sum += col * (half) psWeights[2].x;  

	col = tex2D(_tex0, IN.tc3.xy).a ;  
	sum += col * (half) psWeights[3].x;  

	col = tex2D(_tex0, IN.tc4.xy).a ;  
	sum += col * (half) psWeights[4].x;  
	
	col = tex2D(_tex0, IN.tc5.xy).a ;  
	sum += col * (half) psWeights[5].x;  
	
	col = tex2D(_tex0, IN.tc6.xy).a ;  
	sum += col * (half) psWeights[6].x;  
	
	col = tex2D(_tex0, IN.tc7.xy).a ;  
	sum += col * (half) psWeights[7].x;  

  OUT.Color.xyz = tex2D(_tex0, IN.tc0.zw).xyz; 
	OUT.Color.a = sum;
  return OUT;
}

technique GaussAlphaBlur
{
  pass p0
  {
    VertexShader = CompileVS GaussAlphaBlurVS();
    PixelShader = CompilePS GaussAlphaBlurPS();    
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Kawase Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

///////////////// vertex shader //////////////////

struct vtxOutAnisotropicVertical
{
  float4 HPosition  : POSITION;
  float2 baseTC0 : TEXCOORDN;    
  float2 baseTC1 : TEXCOORDN;    
  float2 baseTC2 : TEXCOORDN;    
  float2 baseTC3 : TEXCOORDN;    
  float2 baseTC4 : TEXCOORDN;    
  float2 baseTC5 : TEXCOORDN;    
  float2 baseTC6 : TEXCOORDN;    
  float2 baseTC7 : TEXCOORDN;    
};

vtxOutAnisotropicVertical AnisotropicVerticalVS(vtxIn IN)
{
  vtxOutAnisotropicVertical OUT = (vtxOutAnisotropicVertical)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
    
  OUT.baseTC0.xy = IN.baseTC.xy + float2(0,blurParams0.x)*0.125*0.75f;
  OUT.baseTC1.xy = IN.baseTC.xy + float2(0,blurParams0.y)*0.125*0.75f;
  OUT.baseTC2.xy = IN.baseTC.xy + float2(0,blurParams0.z)*0.125*0.75f;
  OUT.baseTC3.xy = IN.baseTC.xy + float2(0,blurParams0.w)*0.125*0.75f;

  OUT.baseTC4.xy = IN.baseTC.xy - float2(0,blurParams0.x)*0.75f;
  OUT.baseTC5.xy = IN.baseTC.xy - float2(0,blurParams0.y)*0.75f;
  OUT.baseTC6.xy = IN.baseTC.xy - float2(0,blurParams0.z)*0.75f;
  OUT.baseTC7.xy = IN.baseTC.xy - float2(0,blurParams0.w)*0.75f;
  
  return OUT;
}

///////////////// pixel shader //////////////////
pixout AnisotropicVerticalBlurPS(vtxOutAnisotropicVertical IN)
{
  pixout OUT;
  
  float4 canis = tex2D(blurMap0, IN.baseTC0.xy);
  canis += tex2D(blurMap0, IN.baseTC1.xy);
  canis += tex2D(blurMap0, IN.baseTC2.xy);
  canis += tex2D(blurMap0, IN.baseTC3.xy);
  canis += tex2D(blurMap0, IN.baseTC4.xy);
  canis += tex2D(blurMap0, IN.baseTC5.xy);
  canis += tex2D(blurMap0, IN.baseTC6.xy);
  canis += tex2D(blurMap0, IN.baseTC7.xy);
 

  OUT.Color = canis / 8.0;
  
  return OUT;
}

////////////////// technique /////////////////////
technique AnisotropicVertical
{
  pass p0
  {
    VertexShader = CompileVS AnisotropicVerticalVS();
    PixelShader = CompilePS AnisotropicVerticalBlurPS();
    
    CullMode = Back;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Dilate technique for sprites ///////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

float4 vPixelOffset;			// PS 1/width,1/height,?,?
float4 vDilateParams;			// PS brightness_multiplier,?,?,?

/// Constants ///////////////////////////

////////////////// samplers /////////////////////

///////////////// vertex shader //////////////////

struct vtxInDilate
{
  IN_P
  IN_TBASE
  IN_C0
};

struct vtxOutDilate
{
  float4 HPosition  : POSITION;
  float2 baseTC : TEXCOORD0;    
};

vtxOutDilate DilateVS(vtxInDilate IN)
{
  vtxOutDilate OUT = (vtxOutDilate)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);    
  OUT.baseTC.xy = IN.baseTC.xy;

	OUT.baseTC.xy+=0.00001f;		// lookup more in the middle of the texel - fixes white spots on DX10

  return OUT;
}


///////////////// pixel shader //////////////////
pixout DilatePS(vtxOutDilate IN)
{
  pixout OUT;

	const half2 Kernel_Neighbors[8+12] = 
	{
		-1.0f,0.0f,
		1.0f,0.0f,
		0.0f,-1.0f,
		0.0f,1.0f,

		-1.0f,-1.0f,
		-1.0f,1.0f,
		1.0f,-1.0f,
		1.0f,1.0f,

		-2.0f,0.0f,
		2.0f,0.0f,
		0.0f,-2.0f,
		0.0f,2.0f,

		-2.0f,1.0f,
		2.0f,1.0f,
		1.0f,-2.0f,
		1.0f,2.0f,

		-2.0f,-1.0f,
		2.0f,-1.0f,
		-1.0f,-2.0f,
		-1.0f,2.0f,
	};




	float4 cBase0 = tex2D(_tex0, IN.baseTC.xy);		                  // sun contribution
	float4 cBase1 = tex2D(_tex0, IN.baseTC.xy + vPixelOffset.zw);		// sky contribution
	
	OUT.Color = cBase0;

	half4 cColor0 = cBase0;		// sun contribution

	float2 vBestOffset = IN.baseTC.xy;
	//half2 vBestOffset = half2(0,0);

#ifdef D3D10
  [unroll]
#endif

	int iSampleCount=8;

  if( GetShaderQuality() > QUALITY_LOW )
  	iSampleCount=8+12;

	for(int i=0;i<iSampleCount;i++)	
	{
		float2 vLocalOffset = IN.baseTC.xy+Kernel_Neighbors[i].xy*vPixelOffset.xy;
		half4 cVal0 = tex2D(_tex0, vLocalOffset);		// sun contribution
		
		if (cVal0.a > 0.0f)
		{
			cColor0 = cVal0;
			vBestOffset = vLocalOffset;
		}
	}
	
	half4 cColor1 = tex2D(_tex0, vBestOffset + vPixelOffset.zw);		// sky contribution

	OUT.Color = cColor0+cColor1;

	half fContribution = max(cColor0.r,max(cColor0.g,cColor0.b)) / max(OUT.Color.r,max(OUT.Color.g,OUT.Color.b));		// Sun/(Sun+Sky)
	
	OUT.Color *= vDilateParams.x;		// adjust HDR values to LDR range

  const half SpriteAlphaRef=0.1;

	OUT.Color.a = (cBase0.a>0.0f) ? 1.0f-fContribution*(1.0-SpriteAlphaRef) : 0;
	//OUT.Color.a = cBase0.a;
	//OUT.Color = cBase1; //.a*0.3; // * 0.4;
//  OUT.Color.a = 1;
  return OUT;
}

////////////////// technique /////////////////////

technique Dilate
{
  pass p0
  {
    VertexShader = CompileVS DilateVS();            
    PixelShader = CompilePS DilatePS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Color correction technique /////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 mColorMatrix;

///////////////// pixel shader //////////////////

pixout ColorCorrectionPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = half4(tex2D(_tex0, IN.baseTC.xy).xyz, 1);         
    
  // Apply color transformation matrix to ajust saturation/brightness/constrast
  screenColor.xyz=  float3( dot(screenColor.xyzw, mColorMatrix[0].xyzw),
						    dot(screenColor.xyzw, mColorMatrix[1].xyzw),
                            dot(screenColor.xyzw, mColorMatrix[2].xyzw) );
                         
  // Ajust image gamma                                    
  //screenColor.xyz=pow(screenColor.xyz, renderModeParamsPS.w);
    
  OUT.Color = screenColor;
    
  return OUT;
}

////////////////// technique /////////////////////

technique ColorCorrection
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS ColorCorrectionPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Image blurring techniques //////////////////////////////////////////////////////////////////////

///////////////// pixel shader //////////////////

pixout BlurInterpolationPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = tex2D( _tex0, IN.baseTC.xy );
  half4 blurredColor = tex2D( _tex1, IN.baseTC.xy );
    
  OUT.Color = lerp(blurredColor, screenColor, psParams[0].w);
    
  return OUT;
}

////////////////// technique /////////////////////

technique BlurInterpolation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS BlurInterpolationPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Masked Image blurring techniques //////////////////////////////////////////////////////////////////////

///////////////// pixel shader //////////////////

pixout MaskedBlurInterpolationPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = tex2D( _tex0, IN.baseTC.xy );
  half4 blurredColor = tex2D( _tex1, IN.baseTC.xy );
  half mask = tex2D( _tex2, IN.baseTC.wz ).x;
  mask = sqrt( mask );
    
  OUT.Color = lerp(blurredColor, screenColor, mask * psParams[0].w);
    
  return OUT;
}

////////////////// technique /////////////////////

technique MaskedBlurInterpolation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MaskedBlurInterpolationPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Radial blurring technique //////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

// xy = radial center screen space position, z = radius attenuation, w = blur strenght
float4 vRadialBlurParams;

///////////////// pixel shader //////////////////

pixout RadialBlurringPS(vtxOut IN)
{
  pixout OUT;
  
  float2 vScreenPos = vRadialBlurParams.xy;
  
  float2 vBlurVec = ( vScreenPos.xy - IN.baseTC.xy);
  
  float fInvRadius = vRadialBlurParams.z;
  float blurDist = saturate( 1- dot( vBlurVec.xy * fInvRadius, vBlurVec.xy * fInvRadius)) ;
  vRadialBlurParams.w *= blurDist*blurDist;
  
  const int nSamples = 8; 
  const float fWeight = 1.0 / (float) nSamples;
  
  half4 cAccum = 0;   
  for(int i=0; i < nSamples; i++)
  {
    half4 cCurr = tex2D(_tex0, (IN.baseTC.xy + vBlurVec.xy * i * vRadialBlurParams.w) );      
    cAccum += cCurr;// * (1.0-i * fWeight);
  }
    
  OUT.Color = cAccum * fWeight;
      
  return OUT;
}
////////////////// technique /////////////////////

technique RadialBlurring
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS RadialBlurringPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Motion Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

struct vtxOutMotionBlurDispl
{
  float4 HPosition  : POSITION;
  float4 tcProj     : TEXCOORDN;
  float4 vVelocity  : TEXCOORDN;
  float4 vVelocityPrev  : TEXCOORDN;
};

/// Constants ////////////////////////////

float4x4 mViewProj : PI_Composite;  // ( view projection matrix )
float4x4 mViewProjI : PB_UnProjMatrix;  // invert( view projection matrix )
float4x4 mViewProjPrev;

float4 PI_motionBlurParams;

float4 motionBlurParams;
float4 motionBlurChromaParams;
float4 motionBlurCamParams;
float4 vDirectionalBlur;

/// Samplers ////////////////////////////

sampler2D motionBlurMaskMap : register(s1);

///////////////// vertex shaders //////////////////

vtxOutMotionBlurDispl MotionBlurDisplVS(vtxIn IN)
{
  vtxOutMotionBlurDispl OUT = (vtxOutMotionBlurDispl)0; 

  float4 vPos = IN.Position;
  vPos.xyz = normalize(vPos.xyz) * 25; // * motionBlurCamParams.w; // sphere size needs to be tweakable for setting blur strenght
  vPos.xyz += g_VS_WorldViewPos.xyz;
  
  OUT.HPosition = mul(vpMatrix, vPos);  
      
  float4 vNewPos = OUT.HPosition;
  float4 vPrevPos =  mul(mViewProjPrev, vPos);
  
  OUT.vVelocity =  HPosToScreenTC( vNewPos );
  OUT.vVelocityPrev = HPosToScreenTC( vPrevPos );  

  OUT.tcProj = HPosToScreenTC( OUT.HPosition );

  return OUT;
}

///////////////// pixel shaders //////////////////

pixout MotionBlurdDepthMaskPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float fDepth = GetDepthMap(depthMapSampler, IN.baseTC);
  half mask_accum = exp(-fDepth * PS_NearFarClipDist.y);//exp(-fDepth* 25);
    //saturate( 1 - fDepth* 20.0 ); // 1 alu
  mask_accum *= mask_accum; //^2                // 1 alu 
  //mask_accum *= mask_accum; //^4               // 1 alu
  //mask_accum *= mask_accum; //^8               // 1 alu


  float fRotationAmount = (motionBlurParams.w * 5.0);

  half fNearestMask = ( fDepth * PS_NearFarClipDist.y );  // 1 alu
  fNearestMask = saturate( fNearestMask - 1.0 )*saturate(mask_accum + fRotationAmount);       // 2 alu
  //tcFinal +=  vVelocityLerp.xy * (s - s * fNearestMask);							// 2 alu

  OUT.Color.xyz = tex2D(screenMapSampler, IN.baseTC);
  OUT.Color.w = fNearestMask;//fNearestMask; // store mask in screen map alpha

  return OUT;
}

pixout MotionBlurDepthMaskHDRPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  OUT.Color = tex2D(_tex0, IN.baseTC);

  float fDepth = GetDepthMap(_tex1, IN.baseTC);
  half mask_accum = exp(-fDepth * PS_NearFarClipDist.y);//exp(-fDepth* 25);
    //saturate( 1 - fDepth* 20.0 ); // 1 alu
  mask_accum *= mask_accum; //^2                // 1 alu 
  //mask_accum *= mask_accum; //^4               // 1 alu
  //mask_accum *= mask_accum; //^8               // 1 alu

  float fRotationAmount = (motionBlurParams.w * 5.0);

  half fNearestMask = ( fDepth * PS_NearFarClipDist.y );  // 1 alu
  fNearestMask = saturate( fNearestMask - 1.0 )*saturate(mask_accum + fRotationAmount);       // 2 alu
  //tcFinal +=  vVelocityLerp.xy * (s - s * fNearestMask);							// 2 alu

  OUT.Color.w = fNearestMask;//fNearestMask; // store mask in screen map alpha

  return OUT;
}

float2 GetVelocity( sampler2D sVelocity, float2 tc )
{
  float4 cVelocity = tex2Dlod(sVelocity, float4(tc.xy, 0, 0));
  float fDecodedLenght = cVelocity.z; //dot(cVelocity.zw, float2( 255.0 * 255.0 , 255.0 ) );

  return cVelocity.xy; //(cVelocity.xy*2-1) * fDecodedLenght;
}

pixout MotionBlurObjectPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[8] = {  
	  float2( 0.0,      0.0),
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cDummyFetchDx10 = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)); // dummy fetch for dx10 samplers order declaration workaround
  float fOrigDepth = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;

  float4 Blurred = 0;  
  float2 pixelVelocity;
  
  int NumberOfPostProcessSamples = 8;           
  int nSamples = 8;
  float fUseAllSamples = 0;

  //bool bSingleSample = true;

  //{
  //  for(int n= 0; n<nSamples; n++)
  //  {	    
  //    float2 vOffset = poisson[n]* 0.0333;  // this must scale depending on camera distance
  //    // Sample neightboord pixels velocity
  //    float4 curFramePixelVelocity = tex2Dlod(_tex0, float4(OriginalUV + vOffset, 0, 0));
	 // 	if( !dot(curFramePixelVelocity, 1) )
  //    {
  //      fUseAllSamples = 1;
  //      break;
  //    }
  //  }
  //}

  int s= 0;

#if D3D10
  [unroll]
#endif
  for(int n= 0; n<nSamples; n++)
  {	    
    // todo: this must scale depending on camera distance or object size on screen
    float2 vOffset = poisson[n]* 0.0333 * saturate((1-fOrigDepth)*(1-fOrigDepth) );
    float  fCurrDepth = tex2Dlod(_tex2, float4(OriginalUV + vOffset, 0, 0)).x;
    if ( fCurrDepth > fOrigDepth )
      continue;

    // Sample neightboord pixels velocity
    float2 curFramePixelVelocity = GetVelocity(_tex1, OriginalUV + vOffset);
    pixelVelocity.xy =  curFramePixelVelocity;
        
    half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
		if( fLen )
		{	           
#if D3D10
  [unroll]
#endif
	    for(float i = 0; i < NumberOfPostProcessSamples; i++)
	    {   
	    	float2 lookup = pixelVelocity * ((i / NumberOfPostProcessSamples)-0.5) * PI_motionBlurParams.x + OriginalUV;
	      	      
	      // Lookup color/velocity at this new spot
	      float4 Current = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));
	    	float4 curVelocity = tex2Dlod(_tex1, float4(lookup.xy, 0, 0));
	    	half fBlend = ( length(curVelocity)); 
	    	//float2 curVelocity = GetVelocity(_tex1, lookup.xy);
	    	//float fBlend = length(curVelocity); 
	    		    		      
	      Blurred.xyz += Current;
	      Blurred.w  += fBlend;	      
	      s++;
	    }            
    }

//    if( !fUseAllSamples )
  //    break;
  }

  OUT.Color = float4( cOrig.xyz, 1);
  if( s )
  {
    // Return the average color of all the samples
    float fLerp = Blurred.w/(float)s;     
    OUT.Color.xyz =float4(lerp(cOrig.xyz, Blurred.xyz/(float)s, saturate(fLerp*3)), 1);
  }

  return OUT;
}

pixout MotionBlurObjectMaskPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[7] = {  
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  half2 cOrigVelocity = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0)).xy;
  float fOrigDepth = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)).x;

  float4 Blurred = 0;  
  float2 pixelVelocity;
  
  int nSamples = 7;

  const half fOffsetRange = 100.0;
  
  PS_ScreenSize.zw *= fOffsetRange;
  const half2 vOffsetScale = PS_ScreenSize.zw; // 0.0333 old hardcoded scale

  const half fMinVelocityThreshold = 0.0001;

  half fCenterVelocity = dot( cOrigVelocity.xy, cOrigVelocity.xy);

  if(( fCenterVelocity ) ) // Inside case 
  {
    OUT.Color.x = 1;
    OUT.Color.w = fCenterVelocity > fMinVelocityThreshold.xx; // set second pass mask
    return OUT;
  }
  else 
  {
#if D3D10
  [unroll]
#endif
    for(int n= 0; n<nSamples; n++) // Borders case 
    {	    
      // todo: this must scale depending on camera distance or object size on screen
      float2 vOffset = poisson[n]* vOffsetScale;

      // Sample neightboord pixels velocity
      pixelVelocity.xy = tex2Dlod(_tex0, float4(OriginalUV + vOffset, 0, 0)).xy;

      half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
      OUT.Color.y += fLen;
    }
    

    OUT.Color.y = OUT.Color.y / (float) nSamples;
    OUT.Color.w = OUT.Color.yy > fMinVelocityThreshold.xx;
    OUT.Color.y = (OUT.Color.y > 0.0); // set second pass mask

    return OUT;
  }

//  OUT.Color = saturate( OUT.Color * 10000 );

  return OUT;
}

pixout MotionBlurObjectUsingMaskPS(vtxOut IN)
{
  // premiliary object motion blur optimization using motion mask

  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[7] = {  
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cDummyFetchDx10 = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)); // dummy fetch for dx10 samplers order declaration workaround
  float fOrigDepth = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;
  half4 cMask = tex2Dlod(_tex3, float4(IN.baseTC.xy, 0, 0)).xyzw;

  // dx10 sampler binding workaround...
  //OUT.Color = (cOrig + cDummyFetchDx10 + fOrigDepth + cMask)*0.000001;

  OUT.Color = cOrig;//

  if( dot( cMask.xy, 1) == 0.0 ) 
    return OUT;

  float4 Blurred = 0;
  float2 pixelVelocity;

  int nSamples = 8;

  const int nSamplesEdges = 7;
  const float nRecipSamples = 1.0 / (float)nSamples;

  float s= 0;

  if( cMask.x ) // sample is inside mesh - do regular motion blurring
  {
    // get velocity
    pixelVelocity.xy = GetVelocity(_tex1, OriginalUV) * PI_motionBlurParams.x;
  #if D3D10
    [unroll]
  #endif
    for(float i = 0; i < nSamples; i++)
    {   
  	  float2 lookup = pixelVelocity * ((i * nRecipSamples)-0.5) + OriginalUV;
      Blurred.xyz += tex2Dlod(_tex0, float4(lookup.xy, 0, 0)).xyz;
    }

    OUT.Color = half4(Blurred.xyz * nRecipSamples, 1);
    return OUT;
  }  
  else // samples are in mesh edges
  {

  #if D3D10
    [unroll]
  #endif
    for(int n= 0; n<nSamplesEdges; n++)
    {	    
      // todo: this must scale depending on camera distance or object size on screen
      float2 vOffset = poisson[n]* 0.0333 * saturate((1-fOrigDepth)*(1-fOrigDepth) );
      float  fCurrDepth = tex2Dlod(_tex2, float4(OriginalUV + vOffset, 0, 0)).x;
      if ( fCurrDepth > fOrigDepth )
        continue;

      // Sample neightboord pixels velocity
      float2 curFramePixelVelocity = GetVelocity(_tex1, OriginalUV + vOffset);
      pixelVelocity.xy =  curFramePixelVelocity ;
          
      half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
		  if( fLen )
		  {
  #if D3D10
    [unroll]
  #endif
        for(float i = 0; i < nSamples; i++)
        {   
	    	  float2 lookup = pixelVelocity * ((i * nRecipSamples)-0.5)* PI_motionBlurParams.x + OriginalUV;

          // Lookup color/velocity at this new spot
	        float3 Current = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));
	    	  float4 curVelocity = tex2Dlod(_tex1, float4(lookup.xy, 0, 0));
	    	  half fBlend = ( dot(curVelocity, curVelocity)); 

	        Blurred += half4(Current.xyz, fBlend);
	      }

	      s+= nSamples;
      }
    }

    if( s )
    {
      // Return the average color of all the samples
      half fLerp = Blurred.w/s;     
      OUT.Color.xyz =float4(lerp(cOrig.xyz, Blurred.xyz/s, saturate(fLerp*3)), 1);
    }
  }

  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_VelocityIDRescalePS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float2 vScreenSizeRecip = PI_motionBlurParams.zw; //1.0 / PS_ScreenSize;
  float4 t0 = tex2D(_tex0, IN.baseTC.xy);
  float4 t1 = tex2D(_tex0, IN.baseTC.xy + float2(1,1) * vScreenSizeRecip);
  float4 t2 = tex2D(_tex0, IN.baseTC.xy - float2(1,1) * vScreenSizeRecip);
  float4 t3 = tex2D(_tex0, IN.baseTC.xy + float2(-1,1)* vScreenSizeRecip);
  float4 t4 = tex2D(_tex0, IN.baseTC.xy + float2(1,-1)* vScreenSizeRecip);

  // Use maximum depth
  t0 = (t0.z>t1.z)? t0: t1;
  t0 = (t0.z>t2.z)? t0: t2;
  t0 = (t0.z>t3.z)? t0: t3;
  t0 = (t0.z>t4.z)? t0: t4;
  
  OUT.Color = t0;

  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

float GetResampledOrigDepth( float2 tc )
{
  float2 vScreenSizeRecip = PI_motionBlurParams.xy; //1.0 / PS_ScreenSize; //0.25*PI_motionBlurParams.zw;  // hardcoded half-texel size 
  float t0 = tex2Dlod(_tex1, float4(tc.xy, 0, 0)).x;
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(1,1) * vScreenSizeRecip, 0, 0)).x );
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy - float2(1,1) * vScreenSizeRecip, 0, 0)).x );
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(-1,1)* vScreenSizeRecip, 0, 0)).x );
  
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(1,-1)* vScreenSizeRecip, 0, 0)).x );

  return t0;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_OffsetMapPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 t0 = tex2D(_tex0, IN.baseTC);

  float fLen = length(t0.xy);
  float fSizeScale = 1 - saturate(t0.z * PS_NearFarClipDist.y / 200);
  fSizeScale *= fSizeScale;
  fSizeScale *= fSizeScale;
  fSizeScale *= fSizeScale;



  OUT.Color = t0;
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_CopyAlphaIDPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 t0 = tex2D(_tex0, IN.baseTC);
  //float4 t1 = tex2D(_tex1, IN.baseTC);

  OUT.Color = t0; //float4(t0.xyw, t1.w);
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_VelocityDilationPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

  const int nOffsets = 8;

  float2 vOffsets[ nOffsets ] =
  {  
		-1.0f, 0.0f,
		 1.0f, 0.0f,

		-2.0f, 0.0f,
		 2.0f, 0.0f,

		-3.0f, 0.0f,
		 3.0f, 0.0f,

		-4.0f, 0.0f,
		 4.0f, 0.0f,
  };

  float2 vScrSizeRecip = PS_ScreenSize.zw * 2.0;//PI_motionBlurParams.zw;

  float4 vCenterVelocity = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float fCenterDepth = GetResampledOrigDepth(IN.baseTC.xy );
  float fOffsetScale = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;

  if( fOffsetScale == 0 || dot(vCenterVelocity.xy, vCenterVelocity.xy) )
  {
    // Inside
    OUT.Color = float4(vCenterVelocity.xyzw); 
    return OUT;
  }

  // Check edges
  float4 Blurred = 0;
  float nSamplesCount = 0;
 
#if D3D10
  [unroll]
#endif
  for(int n = 0; n < nOffsets; n++ )
  {  
    #if %_RT_SAMPLE0
		float4 vCurrVelocityDepthID = tex2Dlod(_tex0, float4(IN.baseTC.xy + vOffsets[n].yx *vScrSizeRecip, 0, 0));
	#else
		float4 vCurrVelocityDepthID = tex2Dlod(_tex0, float4(IN.baseTC.xy + vOffsets[n].xy *vScrSizeRecip, 0, 0));
	#endif

    float fDepthCmp = saturate( fCenterDepth - vCurrVelocityDepthID.z );
    fDepthCmp *= dot( vCurrVelocityDepthID.xy, vCurrVelocityDepthID.xy );
    fDepthCmp *= Blurred.z == 0;
    
    if(fDepthCmp)
    {
      //float weight = lerp(1, 0, (float)n / nOffsets);
      Blurred = vCurrVelocityDepthID;// * weight;
    }
  }

  OUT.Color = float4(Blurred);
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_UsingVelocityDilationPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cOrigVelocity = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0));

  OUT.Color = cOrig;
  if( dot(cOrigVelocity.xy, cOrigVelocity.xy) == 0.0)
    return OUT;

  float4 Blurred = 0;
  float2 pixelVelocity;

  const int nSamples = 16;
  const float nRecipSamples = 1.0 / (float)nSamples;
  const float nRecipSamplesAcc = 1.0 / (float)(nSamples-1);

  // get velocity
  pixelVelocity.xy = cOrigVelocity;

  float fLen = length(pixelVelocity.xy);
  if( fLen )
    pixelVelocity.xy /= fLen;

  float2 vScrSizeRecip = 1.0 / PS_ScreenSize;
  const float2 vMaxRange = 48 * vScrSizeRecip.xy; //48.0

  pixelVelocity.xy *= min(fLen, vMaxRange)* PI_motionBlurParams.x;

  float nSamplesCount = 0;
#if D3D10
  [unroll]
#endif
  for(float i = 0; i < nSamples; i++)
  {   
	  float2 lookup = pixelVelocity * ((i * nRecipSamplesAcc)-0.5) + OriginalUV;
   
    float4 tcMB = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));     
    Blurred.xyz += tcMB.xyz;

#if !%_RT_SAMPLE1
      Blurred.w += saturate(100000 * tcMB.w);
#else
      // reusing previous pass blending results
      Blurred.w += tcMB.w;
#endif
  }

  // Blend results with scene
  if( Blurred.w )
  {
    Blurred.xyz *= nRecipSamples;
#if !%_RT_SAMPLE1
    OUT.Color = lerp(cOrig, Blurred,saturate( saturate( Blurred.w*nRecipSamples)*2+ saturate(cOrig.w*1000)));
#else
    OUT.Color = lerp(cOrig, Blurred,saturate( saturate( Blurred.w*nRecipSamples)*2));
#endif
  }
  
  OUT.Color.w = Blurred.w * nRecipSamples;
    
  return OUT;
}

pixout MotionBlurDisplPS(vtxOutMotionBlurDispl IN)
{  
  pixout OUT = (pixout)0;  

  int nQuality = GetShaderQuality();

  half4 cMidCurr = tex2Dproj(screenMapSampler, IN.tcProj.xyzw);  
  half fDepth = GetDepthMapScaledProj(depthMapSampler, IN.tcProj.xyzw);               // 1 alu

  OUT.Color = cMidCurr;

  float fSamples = 8.0;

  const float fWeight = (1.0 / fSamples);  
  const float fWeightStep = (2.0 / fSamples);
  
  //motionBlurParams.w = 1.5;

  float2 vVelocityPrev = ( (IN.vVelocityPrev.xy/IN.vVelocityPrev.w))* PI_motionBlurParams.w;	// 1 div, 1 mul
  
  float2 vVelocity = (IN.vVelocity.xy/IN.vVelocity.w);									// 1 div
  float2 vVelocityDiv = vVelocity;
  vVelocity *= PI_motionBlurParams.w;

  float2 vVelocityLerp = vVelocityPrev - vVelocity;										// 1 sub							

  vVelocityDiv.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
  vVelocityLerp.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
      
  float4 accum = 0;

#if D3D10
  [unroll]
#endif
  for(float s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																						
	  float2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    
    if( nQuality == QUALITY_HIGH )
    {
      half fDepthMask = tex2D(screenMapSampler, tcFinal).w;
      tcFinal +=  vVelocityLerp.xy * (s - s * fDepthMask);							// 2 alu
    }

    accum += tex2D(screenMapSampler, tcFinal ); // 1 alu
  }
  
  accum *= fWeight;                                                                                 // 1 alu
   
  // Remove scene bleeding from 1st player hands
  OUT.Color = lerp(cMidCurr, accum, saturate(fDepth-1.0) );                                        // 3 alu //fDepth*100; //
  
  return OUT;
}


pixout MotionBlurDisplHDRPS(vtxOutMotionBlurDispl IN)
{  
  pixout OUT = (pixout)0;  
  
  int nQuality = GetShaderQuality();

  half4 cMidCurr = tex2Dproj(_tex0, IN.tcProj.xyzw);  
  float fDepth = tex2Dproj(_tex1, IN.tcProj.xyzw).x * PS_NearFarClipDist.y;               // 1 alu

  OUT.Color = cMidCurr;
  
  // skip bellow min threshold (usually sky and nearby geometry) with slow movement
#if %_RT_SAMPLE0
  const float fMinDepthMaskThreshold = 0.05;

  // this is not 100% correct since still needed to sample faraway pixels
  // but for 1st pass is ok - artefacts mostly noticable with fast camera movement
  if( cMidCurr.w < fMinDepthMaskThreshold )   // saves about 1 ms 
    return OUT;
#endif

  // skip nearby geometry with fast movement
  if( fDepth - 1.0f <= 0.0f)
    return OUT;

  half2 vVelocityPrev = ( (IN.vVelocityPrev.xy/IN.vVelocityPrev.w))* PI_motionBlurParams.w;	// 1 div, 1 mul
  
  half2 vVelocity = (IN.vVelocity.xy/IN.vVelocity.w);									// 1 div
  half2 vVelocityDiv = vVelocity;
  vVelocity *= PI_motionBlurParams.w;

  half2 vVelocityLerp = vVelocityPrev - vVelocity;										// 1 sub							

  vVelocityDiv.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
  vVelocityLerp.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
      
  half4 accum = 0;

  half fSamples = 8.0;

#if %_RT_SAMPLE1
  fSamples = 4.0;
#endif

#if %_RT_SAMPLE0

  // use-lower quality masking for first pass

  const half fWeight = (1.0 / fSamples);;  
  const half fWeightStep = (2.0 / fSamples);

#if D3D10
  [unroll]
#endif
  for(half s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																
    half2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    half4 col = tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0) ); // 1 alu
    accum += lerp(cMidCurr, col, col.w );
  }

  accum *= fWeight;                                                                                 // 1 alu
  // Remove scene bleeding from 1st player hands
  OUT.Color = accum; //lerp(cMidCurr, accum, saturate(fDepth-1.0) ); 
#else

  int scount = 0;

  const half fWeight = (1.0 / fSamples);;  
  const half fWeightStep = (2.0 / fSamples);

#if D3D10
  [unroll]
#endif
  for(half s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																
    half2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    half fDepthMask = tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0)).w;
    if( fDepthMask )
    {
      tcFinal +=  vVelocityLerp.xy * (s - s * fDepthMask);							// 2 alu
      accum += tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0) ); // 1 alu
      scount++;
    }

  }

  // Remove scene bleeding from 1st player hands
  if( scount )
  {
    accum /= (half) scount ;
    // Remove scene bleeding from 1st player hands
    OUT.Color = accum; //lerp(cMidCurr, accum, saturate(fDepth-1.0) ); 
  }

#endif


 return OUT;
}

////////////////// technique /////////////////////

technique MotionBlurMaskGen
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurdDepthMaskPS();
    CullMode = None;        
  }
}

technique MotionBlurMaskGenHDR
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurDepthMaskHDRPS();
    CullMode = None;        
  }
}

technique MotionBlurDispl
{
  pass p0
  {
    VertexShader = CompileVS MotionBlurDisplVS();
    PixelShader = CompilePS MotionBlurDisplPS();
    CullMode = None;        
  }
}

technique OMB_OffsetMap
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_OffsetMapPS();
    CullMode = None;        
  }
}

technique OMB_CopyAlphaID
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_CopyAlphaIDPS();
    CullMode = None;        
  }
}


#if %DYN_BRANCHING_POSTPROCESS

technique MotionBlurDisplHDR
{
  pass p0
  {
    VertexShader = CompileVS MotionBlurDisplVS();//MotionBlurDisplHDRVS();
    PixelShader = CompilePS MotionBlurDisplHDRPS();
    CullMode = None;        
  }
}

technique MotionBlurObject
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectPS();
    CullMode = None;      
  }
}

technique MotionBlurObjectUsingMask
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectUsingMaskPS();
    CullMode = None;      
  }
}

technique MotionBlurObjectMask
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectMaskPS();
    CullMode = None;      
  }
}

technique OMB_VelocityIDRescale
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_VelocityIDRescalePS();
    CullMode = None;      
  }
}

technique OMB_VelocityDilation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_VelocityDilationPS();
    CullMode = None;      
  }
}

technique OMB_UsingVelocityDilation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_UsingVelocityDilationPS();
    CullMode = None;      
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth of field technique ///////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

#define DOF_ANAMORPHIC_LENS 0
#define DOF_OPTICAL_SIMULATION 1
#define DOF_USE_NOISE 0

/// Constants ////////////////////////////

float4 dofParamsFocus;
float4 dofParamsBlur;
float4 pixelSizes;
float radiusScale = 0.4;
float dofMinThreshold = 0.2;//0.5; // to ensure a smoother transition between near focus plane and focused area

/// Samplers /////////////////////////////

///////////////// vertex shader //////////////////

///////////////// pixel shader //////////////////
float GetDepthBlurred( sampler2D _tex0, half2 baseTC, float depthOrig, half blurAmount )
{  
	half2 pixelSizes = PS_ScreenSize.zw * 4.h * dofParamsFocus.w;
	float baseColor = depthOrig;
	float weightSum = 1.0;
		
	#if D3D10
	[unroll]
	#endif
	for(int i=1; i<8; i++)
	{
		float weight = lerp(1, 0, (float)i/8.f);
		float blurredDepth = GetDepthMapScaled(_tex0, baseTC.xy + i * pixelSizes);
		blurredDepth += GetDepthMapScaled(_tex0, baseTC.xy - i * pixelSizes);
		blurredDepth *= 0.5;
		blurredDepth = (blurredDepth < depthOrig) ? blurredDepth : depthOrig;
		baseColor += blurredDepth * weight;
		weightSum += weight;
	}
     
    float fDepth = baseColor / (weightSum + 1e-6); 
	return fDepth;
}

half GetDepthBlurinessBiased(half fDepth)
{
  half f=0; 

  // 0 - in focus
  // 1 or -1 - completely out of focus
    
  if(fDepth>(half)dofParamsFocus.y)
  {
    f=(fDepth-(half)dofParamsFocus.y)/(half)dofParamsFocus.z; // max range
    f=clamp(f, 0, 1-(half)dofMinThreshold);   
  }
  else
  if(fDepth<=(half)dofParamsFocus.x)
  {   
    f=(1-fDepth/dofParamsFocus.x)/dofParamsFocus.w;  // min range
  }
  
  return f;
}

pixout CopyDepthToAlphaBiasedNoMaskPS(vtxOut IN)
{
  pixout OUT;  

  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);        	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = (GetDepthBlurinessBiased(depthNormalized))*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = (GetDepthBlurinessBiased(blurred))*dofParamsFocus.w;
			 	
  // Compare blurred depth and unblurred depth.		
  if( blurred >= depth )
	  depth = blurred;
	
  // Encode magnification mask into first pixel.  
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 0.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex1, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );
#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

pixout CopyDepthToAlphaBiasedPS(vtxOut IN)
{
  pixout OUT;  
        
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);  
  half depthMaskColor = tex2D(_tex1, IN.baseTC.xy).x;  
      	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = (GetDepthBlurinessBiased(depthNormalized) * depthMaskColor)*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = (GetDepthBlurinessBiased(blurred) * depthMaskColor)*dofParamsFocus.w;
	 
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 0.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex2, IN.baseTC.xy).xyz, 0);


  //cScreen = max( min( cScreen, (float3)10000000 ), 0 );
  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

half GetDepthBluriness(half fDepth)
{  
  half f=fDepth-(half)dofParamsFocus.z;
  
  // 0 - in focus
  // 1 or -1 - completely out of focus
    
   
  if(fDepth<(half)dofParamsFocus.z)
  {
    f/=(half)dofParamsFocus.x;   
  }
  else
  {
    f/=(half)dofParamsFocus.y;         
    f=clamp(f, 0, 1-(half)dofMinThreshold);   
  }
  
  return f;
}

pixout CopyDepthToAlphaNoMaskPS(vtxOut IN)
{
  pixout OUT;  
        
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);        	
  half depthNormalized = depthMap.x*PS_NearFarClipDist.y;	  
  half depth = saturate(GetDepthBluriness(depthNormalized))*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = saturate(GetDepthBluriness(blurred))*dofParamsFocus.w;
  
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 1.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex1, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

pixout CopyDepthToAlphaPS(vtxOut IN)
{
  pixout OUT;  
   
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);  
  half depthMaskColor = tex2D(_tex1, IN.baseTC.xy).x; 
      	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = saturate((GetDepthBluriness(depthNormalized))*depthMaskColor + depthMaskColor)*dofParamsFocus.w;

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = saturate((GetDepthBluriness(blurred))*depthMaskColor + depthMaskColor)*dofParamsFocus.w;
  
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 1.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex2, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

half4 DOFMergeLayers(half4 tapLow, half4 tapMed, half4 tapHigh, half centerDepth)
{
	  half4 cOut = 0;
	  half4 weights = saturate( centerDepth * half4( -2, -4, -4, 4 ) + half4( 1, 3, 4, -3) );
	  weights.yz = min( weights.yz, 1 - weights.xy );
	  	
	  cOut.xyz = weights.y * tapHigh.xyz + weights.z * tapMed.xyz + weights.w * tapLow.xyz;
	  cOut.a = dot( weights.yzw, half3(16.0f / 17.0f, 1.0f, 1.0f) );
	  
	  return cOut;
}

pixout DofHDRPS(vtxOut IN)
{
  pixout OUT;
  
  int nQuality = GetShaderQuality();

  const int tapCount = 37;
  // xy = poisson coordinates, z = aberration intensity
  float3 poisson[37] =
  {		
		 0.0,   0.0,  1.0,		 
		-1.0,   0.0,  1.0,
		-2.0,   0.0,  1.0,
		-3.0,   0.0,  1.5,
		 3.0,   0.0,  1.5,
		 2.0,   0.0,  1.0,
		 1.0,   0.0,  1.0, // 7
		 0.0,   1.0,  1.0,
		 0.0,   2.0,  1.0,
		 0.0,   3.0,  8.0,
	 	 0.0,  -3.0,  1.5,
		 0.0,  -2.0,  1.0,
		 0.0,  -1.0,  1.0, // 13
		-0.75,  0.75, 1.0,
		-1.75,  1.0,  1.0,
		-2.75,  1.0,  2.0,
		 2.75,  1.0,  2.0,
		 1.75,  1.0,  1.0,
		 0.75,  0.75, 1.0, // 19
		-0.75, -0.75, 1.0,
		-1.75, -1.0,  1.0,
		-2.75, -1.0,  1.5,
		 2.75, -1.0,  1.5,
		 1.75, -1.0,  1.0,
		 0.75, -0.75, 1.0, // 25
		-2.0,   2.0,  6.0,	 
		-2.0,  -2.0,  1.5,
		-1.0,  -1.75, 1.0,
		 1.0,  -1.75, 1.0,
		 2.0,  -2.0,  1.5,
		 2.0,   2.0,  6.0, // 31
		-1.0,   1.75, 1.0,
		-1.0,   2.75, 8.0,
		 1.0,   2.75, 8.0,
		-1.0,  -2.75, 1.5,
		 1.0,  -2.75, 1.5,
		 1.0,   1.75, 1.0, // 37
  };
    
  // Magnify image based on focus distance.
  //float fMagMask = tex2D(_tex0, float2(0.0, 0.0)).a*2-1;
  //if(fMagMask)
	//IN.baseTC.xy = (IN.baseTC.xy - 0.5) * lerp(0.9875f, 1.0f, saturate(dofParamsFocus.z * 0.1)) + 0.5;
  
  // Initial scene.
  OUT.Color = tex2D(_tex0, IN.baseTC.xy);

  // fetch center tap from blured low res image
  float centerDepth = tex2D(_tex1, IN.baseTC.xy).a; 
  //float centerDepth = tex2D(_tex0, IN.baseTC.xy).a; 
    
  // Early out if there's no visual difference.
  if(centerDepth < 0.01)
  {		
	  return OUT;
  }
                   
  float2 vNoise = 0.0;
  if(DOF_USE_NOISE == 1)
  {
	  float2 vNoiseTC = IN.baseTC.xy * PS_ScreenSize.xy / 64.0;
	  vNoise = tex2Dlod(_tex2, float4(vNoiseTC, 0, 0)) + dot(IN.baseTC.xy, 1) * 65535;
	  vNoise = frac( vNoise );
	  vNoise = vNoise*2-1;
	  vNoise *= 0.05;
  }
      
  // Calculate aspect ratio.
  half2 fAspectRatio = 1.0f;
  if(DOF_ANAMORPHIC_LENS == 1)
	  fAspectRatio = half2(1.333f, 2.4f) / 2.4f;

  // Calculate radius.
  half discRadius=(centerDepth*(half)dofParamsBlur.y-(half)dofParamsBlur.x);
  
  half4 texSizes = pixelSizes.xyzw * discRadius * fAspectRatio.xyxy;
  texSizes.zw *= (half)radiusScale;

  // Calculate vignette.
  float2 coordN = IN.baseTC.xy * 2.0 - 1.0;
  float vignette = saturate(dot(coordN, coordN)-0.25);
    
  // Rotation masks.
  float2 vignetteMask = saturate(coordN) + saturate(-coordN);
  
  // Create a rotation matrix based on screen coordinates.
  float2x2 rotationMatrixX = RotationMatrix((-coordN.x * (PI*0.5)) * vignetteMask.x);
  float2x2 rotationMatrixY = RotationMatrix(((1-IN.baseTC.y) * PI) * vignetteMask.y);
  float2x2 rotationMatrix = mul(rotationMatrixY, rotationMatrixX);

  // Go through samples and sum.
  half4 cOut = 0;
  half4 cSumWeights = 0;
  
#if D3D10
  [unroll]
#endif
  for(int t=0; t<tapCount; t++)
  { 
	  //poisson[t].xy = float2( poisson[t].x * 0.866 - poisson[t].y * 0.5, poisson[t].x * 0.5 + poisson[t].y * 0.866 ); // 30 deg rot
	  //poisson[t].xy = 0.707 * poisson[t].xy + 0.707 * float2( - poisson[t].y, poisson[t].x); // 45 deg rot

	  // Rotate poisson using matrix.
	  poisson[t].xy = mul(rotationMatrix, poisson[t].xy);
				
	  // Scale aberration outwards of the screen for aberration vignetting.
	  poisson[t].z = lerp(1.0, poisson[t].z, vignette);
		  
	  float4 tapCoord = IN.baseTC.xyxy + (poisson[t].xyxy + vNoise.xyxy) * texSizes.xyzw;

	  half4 tapHigh = tex2Dlod(_tex0, float4(tapCoord.xy, 0, 0));
	  half4 tapLow = tex2Dlod(_tex1, float4(tapCoord.zw, 0, 0));
		 	      	  
	  //half tapLerp = (tapHigh.a * 2.0 - 1.0);        
	  //half4 tap = lerp(tapHigh, tapLow, saturate(tapLerp));    
	  half4 tap = DOFMergeLayers(tapLow, tapLow, tapHigh, centerDepth);
	  half tapA = tap.a;
		    
	  // Apply leak reduction. Make sure only to reduce on focused areas            
	  tap.a = (tapLow.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tap.a * 2.0 - 1.0);    
	  //tap.a = (tapHigh.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tapHigh.a * 2.0 - 1.0);    
   
   
	  // High-res only.
	  //float2 tapCoord = IN.baseTC.xy + (poisson[t].xy + vNoise.xy) * texSizes.xy;
	  //half4 tap = tex2Dlod(_tex0, float4(tapCoord.xy, 0, 0));
	      
	  //tap.a = (tap.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tap.a * 2.0 - 1.0);    
		     
	  half4 bokehColor = poisson[t].z;
	  if(bokehColor.x > 1.0)
		bokehColor *= half4(0.5, 1.0, 1.0, 1.0);
		             
	  cOut += tap.a * tap * bokehColor;
	  cSumWeights += tap.a * bokehColor;
  }
   
  OUT.Color = cOut/(cSumWeights + 1e-6);
		  		
  return OUT;
}

pixout DofPS(vtxOut IN)
{
  pixout OUT;

  int nQuality = GetShaderQuality();

  const int tapCount = 8;

  float2 poisson[8] =
  {
       0.0,    0.0,
     0.527, -0.085,
    -0.040,  0.536,
    -0.670, -0.179,
    -0.419, -0.616,
     0.440, -0.639,
    -0.757,  0.349,
     0.574,  0.685,
  };
   
  half4 cOut=0;
  half discRadius;
  half discRadiusLow;
  half centerDepth;
        
#if D3D10	
  // temporary workaround for d3d10 hlsl compiler bug
  OUT.Color = tex2D(_tex0, IN.baseTC.xy);
#endif
        
  // fetch center tap from blured low res image
  centerDepth=tex2D(_tex1, IN.baseTC.xy).w;    
  //centerDepth=tex2D(_tex0, IN.baseTC.xy).w;    

  discRadius=(centerDepth*(half)dofParamsBlur.y-(half)dofParamsBlur.x);
  discRadiusLow=discRadius*(half)radiusScale;
  
  pixelSizes.xy=(half2)pixelSizes.xy*discRadius;
  pixelSizes.wz=(half2)pixelSizes.zw*discRadiusLow;

#if D3D10
  [unroll]
#endif
  for(int t=0; t<tapCount; t++)
  { 
    half4 tapHigh=tex2D(_tex0, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.xy);                
    half4 tapLow=tex2D(_tex1, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.wz);        
    
    // Gamma correct (for linear-space blending)
	tapHigh.rgb *= tapHigh.rgb;
	tapLow.rgb *= tapLow.rgb;
        
    half tapLerp=(tapHigh.a*2.0-1.0);        
    half4 tap=lerp(tapHigh, tapLow, saturate(tapLerp));    
    
    // Apply leak reduction. Make sure only to reduce on focused areas            
    tap.a=(tapLow.a-centerDepth+(half)dofMinThreshold>0.0)? 1: saturate(tap.a*2.0-1.0);    
   
    //half4 tap=tex2D(_tex0, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.xy);                
   	//tap.rgb *= tap.rgb; // Gamma correct (for linear-space blending)

    cOut.xyz+=tap.a*tap.xyz;
    cOut.w+=tap.a;
  }
                            
  OUT.Color = cOut/cOut.w;
  
  // Gamma correct (for linear-space blending)
  OUT.Color.rgb = sqrt(OUT.Color.rgb);
  
  return OUT;
}

////////////////// technique /////////////////////

technique CopyDepthToAlphaNoMask
{
  pass p0
  {        
    CullMode = None;        

         
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaNoMaskPS();    
  }
}

technique CopyDepthToAlphaBiasedNoMask
{
  pass p0
  {        
    CullMode = None;        
            
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaBiasedNoMaskPS();    

  }
}

technique CopyDepthToAlpha
{
  pass p0
  {        
    CullMode = None;        

    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaPS();    

  }
}

technique CopyDepthToAlphaBiased
{
  pass p0
  {        
    CullMode = None;        
            
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaBiasedPS();    

  }
}

technique DepthOfField
{
  pass p0
  {        
    CullMode = None;        
    
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DofPS();    
  }
}

#if %DYN_BRANCHING_POSTPROCESS

technique DepthOfFieldHDR
{
  pass p0
  {        
    CullMode = None;        
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DofHDRPS();
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Perspective Warp      //////////////////////////////////////////////////////////////////////////

struct a2v_perspectiveWarp
{
  float4 pos : POSITION;
};

struct v2f_perspectiveWarp
{
  float4 pos : POSITION;
  float2 tex : TEXCOORDN;
};

v2f_perspectiveWarp PerspectiveWarpVS( a2v_perspectiveWarp IN )
{
  v2f_perspectiveWarp OUT = (v2f_perspectiveWarp) 0; 

	const float pi = 3.141592;

	//float aspectRatio = 1022.0 / 683.0;
	//float aspectRatio = g_VS_ScreenSize.x / g_VS_ScreenSize.y;
	float aspectRatio = 1.3333;

  float normFovX = 60.0;
  float normFovY = normFovX / aspectRatio;	
	
  //float newFovX = 110.0;
  float newFovX = 90.0;
  float newFovY = newFovX / aspectRatio;  

  float scaleX = 1.0- ( normFovX / newFovX ); 
  float scaleY = 1.0 - ( normFovY / newFovY ); 

  float ratioX = ( newFovX / normFovX ) / pi;
  float ratioY = ( newFovY / normFovY ) / pi;  

	float2 normalizedPos = IN.pos.xy;		
	normalizedPos.y = -normalizedPos.y;	

	float angX = normalizedPos.x * newFovX * 0.5;
  float angY = normalizedPos.y * newFovY * 0.5;
  
  float2 warpedPos;
  warpedPos.x = ratioX * asin( scaleX * normalizedPos.x );
  warpedPos.y = ratioY * asin( scaleY * normalizedPos.y );   
  
  warpedPos.x /= cos( angX * pi / 180.0 );
  warpedPos.y /= cos( angY * pi / 180.0 );

  //warpedPos.x += sin( scaleX * normalizedPos.x );                
  //warpedPos.y += sin( scaleY * normalizedPos.y );           

  warpedPos.x += (1.0-scaleX) * sin( angX * pi / 180.0 );                
  warpedPos.y += (1.0-scaleY) * sin( angY * pi / 180.0 );           

  OUT.pos = float4( IN.pos.xy, 0, 1 );        
  OUT.tex = warpedPos * 0.5 + 0.5;        
 


  return OUT;
}

pixout PerspectiveWarpPS( v2f_perspectiveWarp IN )
{
  pixout OUT;
  
	OUT.Color = tex2D( screenMapSampler, IN.tex.xy );			
	
  return OUT;
}

technique PerspectiveWarp
{
  pass p0
  {
    VertexShader = CompileVS PerspectiveWarpVS();       
    PixelShader = CompilePS PerspectiveWarpPS();
    CullMode = None;
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Glittering techniques//////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 glitterParams;
float4 glitterSprParams;

float4 camUpVector;
float4 camRightVector;

/// Samplers ////////////////////////////

sampler2D glitterScaledMap_d2 : register(s1);
sampler2D glitterScaledMap_d4 : register(s2);

sampler2D glitterSpriteSampler = sampler_state
{
  Texture = textures/defaults/glitter_sprite.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Clamp;
  AddressV = Clamp;
};

///////////////// vertex shader //////////////////

struct vtxInGlitterSprite
{
  IN_P    
  float3 baseTC : TEXCOORDN;
};

struct vtxOutGlitterSprite
{
  float4 HPosition  : POSITION;
  float3 baseTC     : TEXCOORDN;
  float4 glitCol    : TEXCOORDN;
  float4 screenPos  : TEXCOORDN;
};

///////////////// vertex shader //////////////////

vtxOutGlitterSprite glitterSpriteVS(vtxInGlitterSprite IN)
{

  vtxOutGlitterSprite OUT = (vtxOutGlitterSprite)0; 

  float4 vPos = float4(IN.Position.xyz, 1.0);
  
  // Get view vector  
  float3 viewVec = (vPos.xyz-g_VS_WorldViewPos.xyz);  
  
  // Compute sprite size
  float fDistToCam=length(viewVec);    
  float fSize=(fDistToCam/220.0)*clamp(glitterParams.w, 0.0, 2.0);  // 220.0f -> value tweaked by hand by MK...
        
  // Compute vertex coordinates based on camera right/up vector and texture coordinates
  float2 scale = fSize*2.0*(IN.baseTC.xy - 0.5);    
  
  vPos.z+=fSize*1.25; // try not to intersect with terrain
  vPos.xyz+=(camRightVector.xyz*scale.x + camUpVector.xyz*scale.y);
    
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = float2(IN.baseTC.x, 1-IN.baseTC.y);

  // Generate a normal based on position
  float3 normalVec = normalize(frac(IN.Position.xyz*100.0)*2.0-1.0);      
  
  // Compute attenuation term
  float attenDist=sqrt(IN.baseTC.z);    
  float fAttenuation=saturate(1.0-fDistToCam/attenDist);
     
  // Compute view dependency
  float3 camDir=normalize(-vpMatrix[2].xyz);
  float NdotV=dot(normalVec.xyz, camDir.xyz);
  
  // Compute glint term:
  // - 1. use fractional part of distance to camera
  // - 2. modulate absolute result by visibility term powered by some factor 
  float glintTerm=abs(frac((attenDist-fDistToCam)*0.5*glitterParams.x)*2-1)*saturate(4*pow(NdotV*0.5+0.5, glitterParams.y));
    
  // Final term is glint*distance attenuation
  OUT.glitCol.xyz= fAttenuation*glintTerm;
  
  // Cull sprite (todo: check performance gains, if any at all)
  if(OUT.glitCol.z<0.01)
  {
    OUT.HPosition=0;  
  }
    
  OUT.glitCol.w= NdotV;
  OUT.baseTC.z = fAttenuation;
  
  // Output screen space position for alpha masking
  OUT.screenPos = HPosToScreenTC(OUT.HPosition);
    
  return OUT;
}

///////////////// pixel shader //////////////////

// Used for glitter particles
pixout glitterSpritePS(vtxOutGlitterSprite IN)
{
  pixout OUT;
  half4 baseColor = tex2D(glitterSpriteSampler, IN.baseTC.xy);      
  half  screenAlphaColor = pow(tex2D(screenMapSampler, IN.screenPos.xy/IN.screenPos.ww).w, 16);
  
  half4  screenColor = tex2D(screenMapSampler, IN.baseTC.xy);

  // Fake chromatic Aberration  
  half4 chromAb = IN.glitCol+IN.glitCol*tex2D(rainbowSampler, IN.glitCol.ww);
  
  half3 final=chromAb.xyz*baseColor.xyz;
  // mask out alpha stuff
  OUT.Color.xyz=final.xyz;/
  
  half lum= dot(final.xyz, half3(0.33, 0.59, 0.11));
  OUT.Color.w= lum;
  
  return OUT;
}

// Glitering final pass (used if glitterGlare on)
pixout glitteringPassPS(vtxOut IN)
{
  pixout OUT;
  half4 baseColor = tex2D(_tex0, IN.baseTC.xy);      
    
  half4 glitterColor_d2 = tex2D(glitterScaledMap_d2, IN.baseTC.xy);      
  half4 glitterColor_d4 = tex2D(glitterScaledMap_d4, IN.baseTC.xy);        
  
  baseColor.xyz+=glitterColor_d2.w*glitterColor_d2.xyz*(1-baseColor.xyz)*2.0;    
  baseColor.xyz+=glitterColor_d4.w*glitterColor_d4.xyz*(1-baseColor.xyz)*2.0;    
  OUT.Color=baseColor;
  
  return OUT;
}

////////////////// technique /////////////////////

technique GlitterSprites
{
  pass p0
  {
    VertexShader = CompileVS glitterSpriteVS();
    PixelShader = CompilePS glitterSpritePS();      
    
    CullMode = None;   
    SrcBlend = ONE;
    DestBlend = ONE;
    AlphaBlendEnable = true;
    ZWriteEnable = false;
  }
}

technique GlitteringPass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS glitteringPassPS();      
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Glow technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 glowParamsPS;

/// Samplers ////////////////////////////

sampler2D glowMap_RT1 : register(s1);
sampler2D glowMap_RT2 : register(s2);
sampler2D glowMap_RT3 : register(s3);

struct vtxInGlow
{
  IN_P
  IN_TBASE
  float3 CamVec    : TEXCOORD1;  
};

struct vtxOutGlow
{
  float4 HPosition  : POSITION; 
  float2 baseTC       : TEXCOORD0;
  float3 CamVec       : TEXCOORD1;  
};

vtxOutGlow GlowGenVS(vtxInGlow IN)
{
  vtxOutGlow OUT = (vtxOutGlow)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.CamVec.xyz = IN.CamVec.xyz;

  return OUT;
}

///////////////// pixel shader //////////////////

pixout SceneLuminancePassPS(vtxOut IN)
{
  pixout OUT;
  
  half4 tex_screen = tex2D( _tex0, IN.baseTC.xy );
  
  half fLum = saturate( dot(tex_screen.xyz, float3(0.33, 0.59, 0.11))  ) ;
  
  OUT.Color = half4(fLum.xxx, 0.02);
  
  return OUT;
}

pixout GlowBrightPassPS(vtxOut IN)
{
  pixout OUT;
  
  half4 tex_screen = tex2D(_tex0, IN.baseTC.xy);
  half4 tex_glow = tex2D(_tex1, IN.baseTC.xy);
  half4 tex_eyeadjust = tex2D(_tex2, IN.baseTC.xy);
  
  tex_screen = max(tex_screen - glowParamsPS.z, 0.0)/(tex_screen+glowParamsPS.z);
  tex_screen *= (1 - tex_eyeadjust) * glowParamsPS.w;
  //tex_screen *= (1 - tex_eyeadjust);
  
  //OUT.Color = tex_screen;//1 - exp( - ( tex_screen  + tex_glow ) );
  OUT.Color =  ( tex_screen  + tex_glow )  ;
  
  return OUT;
}

////////////////////////////////////////////////////////
// Merged shader for screen-space SSS and non-HDR glow.

float3 SubsurfaceSample(sampler2D screenMap, float4 baseTC)
{
	float3 Sample = tex2Dlod( screenMap, baseTC );
	//return (Sample*Sample);
	return pow(Sample, 2.2f);
}

pixout MergeSkinAndGlowPS(vtxOut IN)
{
  pixout OUT = (pixout)0;
  
  // Sample scene.
  half4 cScreen = tex2D(_tex0, IN.baseTC.xy);
  OUT.Color = cScreen;

  //------------------------------------------------------------------------------------
  // Glow (holograms, etc.)
  //------------------------------------------------------------------------------------
  half4 tex_glow1 = tex2D(_tex1, IN.baseTC.xy);
  half4 tex_glow2 = tex2D(_tex2, IN.baseTC.xy);
  half4 tex_glow3 = tex2D(_tex3, IN.baseTC.xy);

  // Sum up results    
  half4 final_glow = (tex_glow1 + tex_glow2 + tex_glow3) * glowParamsPS.w;

  //------------------------------------------------------------------------------------
  // Screen-space SSS, 3 layer poisson (21-tap) apporimxation.
  //------------------------------------------------------------------------------------
  const float2 poisson[7] =
  {
     float2(0.527, -0.085), float2(-0.040,  0.536), float2(-0.670, -0.179), float2(-0.419, -0.616), 
     float2(0.440, -0.639), float2(-0.757,  0.349), float2(0.574,  0.685),
  };

  // Layer weights.
  const float3 cSkinWeights[4] =
  {
	   float3(0.333, 0.791, 0.993), float3(0.231, 0.205, 0.007),
	   float3(0.385, 0.004, 0.0), float3(0.078, 0.0, 0.0)
  };
  
  // Sample stretchmap/mask/blur weights from texture
  float fBlurStrength = (tex_glow1.a * tex_glow1.a);// * 2.0;//tex2D(_tex1, IN.baseTC.xy).a;
    
  // Only calculate SSS if the mask is positive (no skin, high-frequency, etc).
  if(fBlurStrength)
  {	 
	  // Calculate blur scale
  	  float2 vKernelScale = 16.0f * fBlurStrength * PS_ScreenSize.zw;// * 2.0f;
	  float3 cLayer0 = pow(cScreen, 2.2f);
 	  //float3 cLayer0 = (cScreen*cScreen);
 
	  // Second layer.
	  float3 cLayer1 = cLayer0;
	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer1 *= 0.125f;

	  // Scale kernel for next layer.
	  vKernelScale *= float2(2.0, -2.0);

	  // Third layer.
	  float3 cLayer2 = cLayer1;
	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer2 *= 0.125f;
	  
	  // Scale kernel for next layer.
	  vKernelScale *= float2(-4.0, -4.0);
	  
	  // Fourth layer.
	  float3 cLayer3 = cLayer2;
	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer3 *= 0.125f;
	  	  	  	  
	  // Sum each layer with respective weights.
      float3 cSkinDiffusion = cLayer0 * cSkinWeights[0];
      cSkinDiffusion += cLayer1 * cSkinWeights[1];			
	  cSkinDiffusion += cLayer2 * cSkinWeights[2];		
	  cSkinDiffusion += cLayer3 * cSkinWeights[3];		
	              
	  // Back to gamma space.     
  	  OUT.Color.rgb = pow(cSkinDiffusion, 1.0f/2.2f);
  	  //OUT.Color.rgb = sqrt(cSkinDiffusion);
  }
   
  //------------------------------------------------------------------------------------
  // Glow output (holograms, etc.)
  //------------------------------------------------------------------------------------
              
  // Apply glow at the very end.
  OUT.Color.rgb += final_glow; 

  return OUT;
}

////////////////// technique /////////////////////

technique SceneLuminancePass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS SceneLuminancePassPS();    
    CullMode = None;        
  }
}

technique GlowBrightPass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS GlowBrightPassPS();    
    CullMode = None;        
  }
}

technique MergeSkinAndGlow
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MergeSkinAndGlowPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// GlowScene: copies glow into backbuffer /////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

////////////////// samplers /////////////////////

/////////////////////////////////////////////////
pixout GlowScenePS(vtxOut IN)
{
  pixout OUT;
  
  half4 cGlow = tex2D(_tex0, IN.baseTC.xy);
  OUT.Color = float4(cGlow.rgb, 1.0);
  
  return OUT;
}

////////////////// technique /////////////////////
technique GlowScene
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS GlowScenePS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// EncodeHDRGlow: encodes HDR glow into LDR glow texture //////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

////////////////// samplers /////////////////////

pixout EncodeHDRtoLDRPS(vtxOut IN)
{
  pixout OUT;
  
  OUT.Color = EncodeRGBS( tex2D( _tex0, IN.baseTC.xy) );
  
  return OUT;
}

////////////////// technique /////////////////////
technique EncodeHDRtoLDR
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS EncodeHDRtoLDRPS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// SunShafts technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 PI_sunShaftsParams;
float4 sunShaftsParams;
float4x4 SunShafts_ViewProj;
float4 SunShafts_SunPos;

struct vtxOutSunShaftsGen
{
  float4 HPosition  : POSITION; 
  float2 baseTC       : TEXCOORD0;
  float4 sunPos       : TEXCOORD1;  
};

/// Samplers ////////////////////////////

vtxOutSunShaftsGen SunShaftsGenVS(vtxIn IN)
{
  vtxOutSunShaftsGen OUT = (vtxOutSunShaftsGen)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  float4 SunPosH = mul(SunShafts_ViewProj, SunShafts_SunPos);
  OUT.sunPos.x = (SunPosH.x + SunPosH.w) * 0.5 ;
  OUT.sunPos.y = (-SunPosH.y + SunPosH.w) * 0.5 ;
  OUT.sunPos.z = SunPosH.w;
  
  OUT.sunPos.w = (dot(normalize(SunShafts_SunPos).xyz, SunShafts_ViewProj[2].xyz));

  return OUT;
}

///////////////// pixel shader //////////////////

pixout SunShaftsMaskGenPS(vtxOutTexToTex IN)
{
  pixout OUT;
  
  int nQuality = GetShaderQuality();
  
  half4 scene = 0;
  half sceneDepth = 0;
  if( nQuality == QUALITY_HIGH )
  {
    half sceneDepth0 = tex2D(_tex0, IN.baseTC0.xy).r;
    half sceneDepth1 = tex2D(_tex0, IN.baseTC1.xy).r;
    half sceneDepth2 = tex2D(_tex0, IN.baseTC2.xy).r;
    half sceneDepth3 = tex2D(_tex0, IN.baseTC3.xy).r;
    half sceneDepth4 = tex2D(_tex0, IN.baseTC4.xy).r;    
    sceneDepth = (sceneDepth0 + sceneDepth1 + sceneDepth2 + sceneDepth3 + sceneDepth4) * 0.2;
    
    half4 scene0 = tex2D(_tex1, IN.baseTC0.xy);
    half4 scene1 = tex2D(_tex1, IN.baseTC1.xy);
    half4 scene2 = tex2D(_tex1, IN.baseTC2.xy);
    half4 scene3 = tex2D(_tex1, IN.baseTC3.xy);
    half4 scene4 = tex2D(_tex1, IN.baseTC4.xy);
    scene = (scene0 + scene1 + scene2 + scene3 + scene4) * 0.2;
  }
  else
  {
    sceneDepth = tex2D(_tex0, IN.baseTC0.xy).r;
    scene = tex2D(_tex1, IN.baseTC0.xy);
  }

  //half fMask = saturate( 8*(1-abs(sceneDepth*2-1)) ); 
  ///half fCloudsMask = 1 - saturate(tex2D(_tex1, IN.baseTC.xy).w*2-1);  
  half fShaftsMask = (1 - sceneDepth);  
  
  OUT.Color = half4( scene.xyz * saturate(sceneDepth), fShaftsMask );

  return OUT;
}

pixout SunShaftsGenPS(vtxOutSunShaftsGen IN)
{
  pixout OUT;
  
  float2 sunPosProj = ((IN.sunPos.xy / IN.sunPos.z));
  
  float fSign = (IN.sunPos.w);
  
  float2 sunVec = ( sunPosProj.xy - IN.baseTC.xy);
  
  float fAspectRatio =  1.333 * PS_ScreenSize.y /PS_ScreenSize.x;
  
  float sunDist = saturate(fSign) * saturate( 1 - saturate(length(sunVec * float2(1, fAspectRatio))*PI_sunShaftsParams.y));// * 
                            //saturate(saturate(fSign)*0.6+0.4  ) );
                            // *(1.0 - 0.2*(1- sin(AnimGenParams) ) pass variation per constant
  float2 sunDir =  ( sunPosProj.xy - IN.baseTC.xy);
   
  
  half4 accum = 0; 
  sunDir.xy *= PI_sunShaftsParams.x * fSign;
  
  const float numSamples = 8;
  
#if D3D10
  [unroll]
#endif
  for(int i=0; i<numSamples; i++)
  {
    half4 depth = tex2D(_tex0, (IN.baseTC.xy + sunDir.xy * i) );      
    accum += depth * (1.0-i/numSamples);
  }
  
  accum /= numSamples;
  
  OUT.Color = accum * 2  * float4(sunDist.xxx, 1);
  OUT.Color.w += 1-saturate(saturate(fSign*0.1+0.9));
  //OUT.Color.xyz *=1- saturate(0.5-0.5* fSign);
    
  return OUT;
}

pixout SunShaftsDisplayPS(vtxOut IN)
{
  pixout OUT;

  // Gamma correct input colors.
  HDRGammaCorrectInputColor(g_PS_SunColor);

  half4 cScreen = tex2D(_tex0, IN.baseTC.xy);      
  half4 cSunShafts = tex2D(_tex1, IN.baseTC.xy);

  half fShaftsMask = saturate(1.00001- cSunShafts.w) *sunShaftsParams.x * 2.0;
  //fShaftsMask -= saturate(0.00001 + cSunShafts.w) * sunShaftsParams.x;
      
  // Apply "very" subtle (but always visible) sun shafts mask 
  float fBlend = cSunShafts.w;
  
  // normalize sun color (dont wanna huge values in here)
  float4 sunColor = 1;
  sunColor.xyz = normalize(g_PS_SunColor.xyz);
  
  // 
  OUT.Color =  cScreen + cSunShafts.xyzz * sunShaftsParams.y * sunColor * ( 1 - cScreen );
  OUT.Color = BlendSoftLight(OUT.Color, sunColor * fShaftsMask *0.5+0.5);
      
  return OUT;
}

////////////////// technique /////////////////////

technique SunShaftsMaskGen
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();
    PixelShader = CompilePS SunShaftsMaskGenPS();    
    CullMode = None;        
  }
}

technique SunShaftsGen
{
  pass p0
  {
    VertexShader = CompileVS SunShaftsGenVS();
    PixelShader = CompilePS SunShaftsGenPS();    
    CullMode = None;        
  }
}

technique SunShaftsDisplay
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS SunShaftsDisplayPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth Enhancement technique ////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4 vDepthEnhancementParams;

///////////////// pixel shader //////////////////

pixout DepthEnhancementPS(vtxOut IN)
{
  pixout OUT;
      
  half4 cScreen = tex2D(_tex0, IN.baseTC.xy );      
  
  float2 vSamples[8] =
  {
    -float2(0, 1),
    -float2(1, 0),
    float2(0, 1),
    float2(1, 0),
    
    float2(0.5, 0.85),
    float2(0.85, 0.5),
    -float2(0.5, 0.85),
    -float2(0.85, 0.5),
  };
  
  float fDepth = GetDepthMapScaled(_tex1, IN.baseTC.xy );  
  
  
  float fDepthBlur = 0;
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 * vSamples[0] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[1] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[2] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[3] / ScrSize.xy) ), 1.0) , -1.0) ;  
  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 * vSamples[4] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[5] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[6] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[7] / ScrSize.xy) ), 1.0) , -1.0) ;    
  
  fDepthBlur /= 8.0;
  
  fDepth *= PS_NearFarClipDist.y;
  //fDepthBlur *= PS_NearFarClipDist.y;    
  
  //OUT.Color = lerp(0.5, cScreen, max( 1-abs( fDepthLow - fDepth + 0.01)), 0) ); //1 - cScreen;
  
  //OUT.Color = lerp(0.5, cScreen,  1 + 0.5 * saturate( 100 * max( abs( fDepthLow - fDepth), 0) ));
  //OUT.Color =  cScreen * (1 - abs(fDepthBlur)*0.5);//lerp(dot(cScreen, float4(0.33, 0.59, 0.11, 0)), cScreen, 1.0 - fDepthBlur ); //max( min( ( fDepth - fDepthBlur ), 1.0) , -1.0) ;
  
  //OUT.Color =  lerp(0.5, cScreen, 1 + min( abs(fDepthBlur), 1.5) ); //max( min( ( fDepth - fDepthBlur ), 1.0) , -1.0) ;  
    
  OUT.Color = saturate(1- abs(fDepthBlur) )* cScreen;
  //OUT.Color = cScreen;
  
  //saturate( max( abs( fDepth - fDepthLow), 0) ); //saturate(fDepthLow > fDepth + 1.0 );
      
  return OUT;
}

////////////////// technique /////////////////////

technique DepthEnhancement
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DepthEnhancementPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Chroma Shift technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

///////////////// pixel shader //////////////////

pixout ChromaShiftPS(vtxOut IN)
{
  pixout OUT;
      
  half4 cScreen = 0;
  
  cScreen.x = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].x) + 0.5).x;      
  cScreen.y = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].y) + 0.5).y;      
  cScreen.z = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].z) + 0.5).z;      
    
  OUT.Color = cScreen;

  return OUT;
}

////////////////// technique /////////////////////

technique ChromaShift
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS ChromaShiftPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// UnderwaterView technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

sampler2D underwaterBumpSampler = sampler_state
{
  Texture = textures/defaults/screen_noisy_bump.dds;
  MinFilter = LINEAR;  
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
};

///////////////// pixel shader //////////////////

pixout UnderwaterViewPS(vtxOut IN)
{
  pixout OUT;
    
#if D3D10	
  // temporary workaround for d3d10 hlsl compiler bug
  OUT.Color = tex2D(screenMapSampler, IN.baseTC.xy);
#endif

  float anim = frac(AnimGenParams*0.01);  
  float3 vec = normalize(float3(IN.baseTC.xy *2-1, 1));
  half4 cBumpy = tex2D(underwaterBumpSampler, IN.baseTC.xy*0.025 + anim )*2-1;
  cBumpy += tex2D(underwaterBumpSampler, IN.baseTC.yx*0.033 - anim )*2-1;
  cBumpy.xyz = normalize( cBumpy ).xyz;
      
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy + cBumpy.xy*0.01);
  
  OUT.Color = cScreen;

  return OUT;
}

////////////////// technique /////////////////////

technique UnderwaterView
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS UnderwaterViewPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// UnderwaterGodRays technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 vpGodMatrix  : PI_Composite; // View*Projection
float4x4 vpGodMatrixI : PB_UnProjMatrix; // invert( View * projection )

float4 CausticsAmbient  : PI_Ambient;
float4 CausticParams	  : PB_CausticsParams;  // xy: caustics distance, zw: 1 / caustics distance


float4 PI_GodRaysParamsVS;
float4 PI_GodRaysParamsPS;
float4 PI_GodRaysSunDirVS;
float4 CausticSmoothSunDir	: PB_CausticsSmoothSunDirection; 


sampler2D wavesSampler = sampler_state
{
  Texture = textures/defaults/oceanwaves_ddn.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR;
  AddressU = Wrap;
  AddressV = Wrap;	
};

sampler2D causticsSampler = sampler_state
{
  Texture = textures/defaults/caustics_sampler.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = NONE;
  AddressU = Clamp;
  AddressV = Clamp;	
};

float g_fWaterLevel
<
  Position;
> = {PB_WaterLevel};

struct vtxOutGodRays
{
  float4 HPosition  : POSITION; 
  float4 baseTC    : TEXCOORDN; // zw unused
  
  float4 waveTC      : TEXCOORDN;
  float4 causticTC0  : TEXCOORDN;
  float4 causticTC1  : TEXCOORDN;
  
  float4 vPosition : TEXCOORDN;  // w unused   
};

/// Samplers ////////////////////////////

vtxOutGodRays UnderwaterGodRaysVS(vtxIn IN)
{
  vtxOutGodRays OUT = (vtxOutGodRays)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy *2 - 1);
  
  vPos.xy *= 1.2; // hack: make sure to cover entire screen
  
  // Increase each slice distance
  vPos.z = 0.1+ 0.88 * saturate(PI_GodRaysParamsVS.z * PI_GodRaysParamsVS.w);
  //vPos.z = 0.4+ 0. * saturate(vsParams[0].z * vsParams[0].w);
  vPos.w = 1;
  
  // Project back to world space
  vPos = mul(vpGodMatrixI, vPos );
  vPos /= vPos.w;
 
  OUT.HPosition = mul(vpGodMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.baseTC.y =  1 - OUT.baseTC.y;

  OUT.vPosition.xyz = vPos;
  OUT.vPosition.w = 1;
    
  // Generate projection matrix based on sun direction  
  float3 dirZ = CausticSmoothSunDir.xyz;
  float3 up = float3(0,0,1);
  float3 dirX = normalize(cross(up, dirZ));
  float3 dirY = normalize(cross(dirZ, dirX));

  float3x3 mLightView;
  mLightView[0] = dirX.xyz;
  mLightView[1] = dirY.xyz;
  mLightView[2] = dirZ.xyz;
   
  // Output caustics procedural texture generation 
  float2 uv = mul(mLightView, OUT.vPosition.xyz).xy*0.5;
  
  // half tilling used to avoid annoying aliasing when swimming fast
  OUT.waveTC.xy =  uv * 2 * 0.01 * 0.012 + g_VS_AnimGenParams.w * 0.06;
  OUT.waveTC.wz =  uv * 2 * 0.01 * 0.01 + g_VS_AnimGenParams.w * 0.05;

  OUT.causticTC0.xy =  uv * 0.01 * 0.5 *2+ g_VS_AnimGenParams.w * 0.1;
  OUT.causticTC0.wz =  uv.yx * 0.01 * 0.5 *2- g_VS_AnimGenParams.w * 0.11;  

  OUT.causticTC1.xy =  uv * 0.01 * 2.0 *2+ g_VS_AnimGenParams.w * 0.1;
  OUT.causticTC1.wz =  uv.yx * 0.01 * 2.0 *2- g_VS_AnimGenParams.w * 0.11;  

  return OUT;
}

///////////////// pixel shader //////////////////

pixout UnderwaterGodRaysPS(vtxOutGodRays IN)
{
  pixout OUT;
    
  half4 cScreen =  tex2D(screenMapSampler, IN.baseTC.xy);
      
  // break movement, with random patterns
  float3 wave = 0;
  wave.xy = FetchNormalMap( wavesSampler, IN.waveTC.xy).xy;                                                  // 1 tex
  wave.xy += FetchNormalMap( wavesSampler, IN.waveTC.wz).xy;                                                 // 1 tex, 1 alu

  // Normalization optimization:
  //  - Instead of using GetNormalMap everywhere, which costs 3 alu per lookup, merge both
  //  bumps together, do single normalize after  
  
  // fast normalize
  wave.xy = wave.xy - 1.0;                                                                          // 1 alu
  wave.z = sqrt(1.0 - dot(wave.xy, wave.xy));                                                       // 2 alu    

  wave *= 0.02;                                                                                     // 1 alu  

  half3 causticMapR = 0;
  causticMapR.xy = FetchNormalMap( wavesSampler, IN.causticTC0.xy + wave.xy).xy;     // 1 tex + 2 alu
  causticMapR.xy += FetchNormalMap(wavesSampler, IN.causticTC0.wz + wave.xy).xy;     // 1 tex + 3 alu
   
  // fast normalize  
  causticMapR.xy = causticMapR.xy - 1.0;                                                            // 1 alu
  causticMapR.z = sqrt(1.0 - dot(causticMapR.xy, causticMapR.xy));                                  // 2 alu    
  
  half2 causticHighFreq = 0;
  causticHighFreq = FetchNormalMap( wavesSampler, IN.causticTC1.xy + wave.xy ).xy;   // 1 tex  + 1 alu
  causticHighFreq += FetchNormalMap( wavesSampler, IN.causticTC1.wz + wave.xy ).xy;   // 1 tex  + 2 alu
  causticHighFreq = causticHighFreq * 2.0 - 2.0;                                                    // 1 alu

  causticMapR.xy += causticHighFreq;  

  // Caustics sampler contains function: abs( 1-(abs( a) + abs(b))*0.5 ), which generates nice sharp pattern  
  half3 cCaustic;
  cCaustic.x = tex2D(causticsSampler, causticMapR.xy*0.55+0.55).x;
  cCaustic.y = tex2D(causticsSampler, causticMapR.xy*0.525+0.525).x;
  cCaustic.z = tex2D(causticsSampler, causticMapR.xy*0.5+0.5).x;
  
  float slice_pos = PI_GodRaysParamsPS.z * PI_GodRaysParamsPS.w;    
  
  // sharpen up a bit
  cCaustic *= cCaustic;
  
  // add very sharp highlight
  const half cMaxHightVis = 10.0;
  half fHighlightAtten =  1;//cMaxHightVis / (CausticParams.x - IN.vPosition.z);                         // 2 alu    
  fHighlightAtten = saturate( fHighlightAtten ) * min( abs( fHighlightAtten ), 2);  
  
  half fAtten =1;// saturate( (CausticParams.x - IN.vPosition.z)*4 );                                          // 2 alu  
  
  cCaustic += pow( cCaustic, 8 );
  
  //half4 cScreen =  tex2D(screenMapSampler, IN.baseTC.xy);
  cScreen.xyz = cCaustic * PI_GodRaysParamsPS.w  * PI_GodRaysParamsPS.y * saturate( CausticParams.y  )* 0.25;
  
  
  half fDistToCam = length( WorldViewPos.xyz - IN.vPosition.xyz );                                      // 2 alu
  
  // 4 alu
    
  fAtten *= ( slice_pos );
  
  cScreen.xyz *= fAtten *fHighlightAtten;
  
  OUT.Color = cScreen;

  return OUT;
}

pixout UnderwaterGodRaysFinalPS(vtxOut IN)
{
  pixout OUT;

  half4 c0 = tex2D(screenMapSampler, IN.baseTC.xy);
  float anim = frac(AnimGenParams*0.01);  
  float3 vec = normalize(float3(IN.baseTC.xy *2-1, 1));
  half4 cBumpy = tex2D(underwaterBumpSampler, IN.baseTC.xy*0.025 + anim )*2-1;
  cBumpy += tex2D(underwaterBumpSampler, IN.baseTC.yx*0.033 - anim )*2-1;
  cBumpy.xyz = normalize( cBumpy ).xyz;
    
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy + cBumpy.xy*0.0125); 
  half4 cCaustics = tex2D(screenMapScaledSampler_d4, IN.baseTC.xy + cBumpy.xy*0.01);
          
  OUT.Color = cScreen + cCaustics;

  return OUT;
}
 
////////////////// technique /////////////////////

technique UnderwaterGodRays
{
  pass p0
  {
    VertexShader = CompileVS UnderwaterGodRaysVS();
    PixelShader = CompilePS UnderwaterGodRaysPS();    
    CullMode = None;        
  }
}

technique UnderwaterGodRaysFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS UnderwaterGodRaysFinalPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Volumetric scattering technique ////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4 PI_volScatterParamsVS;
float4 PI_volScatterParamsPS;
float4 VolumetricScattering;  // x: tilling, y: speed
float4 VolumetricScatteringColor; 

sampler3D volumeMapSampler = sampler_state
{  
  Texture = textures/defaults/Noise3D.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
  AddressW = Wrap;
};


struct vtxOutVolumetricScattering
{
  float4 HPosition  : POSITION; 
  float4 baseTC    : TEXCOORDN; // zw unused
  
  float4 vPosition0 : TEXCOORDN;  // w unused   
  float4 vPosition1 : TEXCOORDN;  // w unused   
};

/// Samplers ////////////////////////////

vtxOutVolumetricScattering VolumetricScatteringVS(vtxIn IN)
{
  vtxOutVolumetricScattering OUT = (vtxOutVolumetricScattering)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy + g_VS_ScreenSize.zw * 0.5 )*2 - 1 ; 
  
  // Increase each slice distance
  vPos.z = 0.5 + 0.5*saturate(PI_volScatterParamsVS.z * PI_volScatterParamsVS.w);;
  vPos.w = 1;
  
  // Project back to world space
  vPos = mul(vpGodMatrixI, vPos );
  vPos /= vPos.w;
 
  OUT.HPosition = mul(vpGodMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.baseTC.y =  1 - OUT.baseTC.y;
  
  vPos *= VolumetricScattering.x;
  g_VS_AnimGenParams.w *= VolumetricScattering.y;
  
  OUT.vPosition0.xyz = vPos*0.1 + g_VS_AnimGenParams.w *0.2;
  OUT.vPosition1.xyz = vPos*0.11 - g_VS_AnimGenParams.w *0.3;
    
  return OUT;
}

///////////////// pixel shader //////////////////

pixout VolumetricScatteringPS(vtxOutVolumetricScattering IN)
{
  pixout OUT;
  
  half4 cScreen;
  float fVolume = 1 - abs(tex3D(volumeMapSampler, IN.vPosition0 ).w*2-1);
  fVolume += 1 - abs(tex3D(volumeMapSampler, IN.vPosition1).w*2-1);
  fVolume *=0.5;
    
  fVolume *= fVolume;
  fVolume *= fVolume;
  fVolume *= fVolume;
  //fVolume *= fVolume;
  
  OUT.Color = fVolume * PI_volScatterParamsPS.w  * PI_volScatterParamsPS.y * CausticParams.y * VolumetricScatteringColor;

  return OUT;
}

pixout VolumetricScatteringFinalPS(vtxOut IN)
{
  pixout OUT;
  
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy);  
  half4 cVolume = tex2D(screenMapScaledSampler_d4, IN.baseTC.xy);

  OUT.Color = cScreen + cVolume;

  return OUT;
}

////////////////// technique /////////////////////

technique VolumetricScattering
{
  pass p0
  {
    VertexShader = CompileVS VolumetricScatteringVS();
    PixelShader = CompilePS VolumetricScatteringPS();    
    CullMode = None;        
  }
}

technique VolumetricScatteringFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS VolumetricScatteringFinalPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Distant rain technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 mComposite  : PI_Composite; // View*Projection
float4x4 mUnproject  : PB_UnProjMatrix; // invert( View * projection )
float4 cRainColor;
float4 PI_RainParamsVS;
float4 PI_RainParamsPS;

struct vtxOutDistantRain
{
  float4 HPosition  : POSITION; 
  float4 vPosition : TEXCOORDN;  // w unused   
  float4 vPosition2 : TEXCOORDN;  // w unused   
  float4 tcProj     : TEXCOORDN;
};

/// Samplers ////////////////////////////

vtxOutDistantRain DistantRainVS(vtxIn IN)
{
  vtxOutDistantRain OUT = (vtxOutDistantRain)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy *2 - 1);
  
  vPos.xy *= 1.2; // hack: make sure to cover entire screen
  
  // Increase each slice distance
  //vPos.z = 0.1+ 0.88 * saturate(vsParams[0].z * vsParams[0].w);
  
  //vPos.z = 0.99+ 0.0025 * saturate(vsParams[0].z * vsParams[0].w);
  vPos.z = 0.005+0.99+ 0.0025 *  saturate((PI_RainParamsVS.z * PI_RainParamsVS.w ));
 
  OUT.HPosition = mul(mComposite, vPos);  
  //OUT.HPosition.z = 0;
  

  OUT.tcProj = HPosToScreenTC( OUT.HPosition );

  OUT.vPosition.xyz = vPos + PI_RainParamsVS.x * float3(0, 0, 100*g_VS_AnimGenParams.x* ((PI_RainParamsVS.w*0.5+0.5)));
  OUT.vPosition2.xyz = vPos+ PI_RainParamsVS.x * float3(0, 0, 500*g_VS_AnimGenParams.x* ((PI_RainParamsVS.w*0.5+0.5)));
  OUT.vPosition.w = 1;
  OUT.vPosition2.w = 1;
    
  // Generate projection matrix based on sun direction  
  float3 dirZ = -g_VS_SunLightDir;
  float3 up = float3(0,0,1);
  float3 dirX = normalize(cross(up, dirZ));
  float3 dirY = normalize(cross(dirZ, dirX));

  float3x3 mLightView;
  mLightView[0] = dirX.xyz;
  mLightView[1] = dirY.xyz;
  mLightView[2] = dirZ.xyz;
   
  // Output caustics procedural texture generation 
  float2 uv = OUT.vPosition.xy; //mul(mLightView, OUT.vPosition.xyz).xy*0.5;

  OUT.vPosition.w =  vPos.z;//uv * 0.01 * 0.5 *2+ g_VS_AnimGenParams.w * 0.1;


  return OUT;
}

///////////////// pixel shader //////////////////

pixout DistantRainPS(vtxOutDistantRain IN)
{
  pixout OUT;
            
  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Simulate distant rain with 3 noisy layers

  OUT.Color = saturate(tex3D( volumeMapSampler, IN.vPosition.xyz*0.45*float3(1,1,0.05)*0.1).w)*0.025*4;
  OUT.Color += saturate(tex3D( volumeMapSampler, IN.vPosition.xyz*0.3*float3(1.1,2.09,0.34)*0.1).w*2-0.8)*0.05;

  // Store current value - will be used for hits look variation
  half fHitMask = OUT.Color.x;  

  OUT.Color *= 0.5;
  OUT.Color *= saturate(tex3D( volumeMapSampler, IN.vPosition2.xyz*0.245*float3(1,1,0.1)*0.0005).w*0.5+0.5);

  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Compute softintersection coeficients with surfaces and water plane

  half fSceneDepth = GetDepthMap(depthMapSampler, IN.tcProj.xy / IN.tcProj.w);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams0.xy);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams0.zw);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams1.xy);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams1.zw);
  fSceneDepth *= PS_NearFarClipDist.y *0.2f;
  
  float fRainDepth = IN.tcProj.w; 	

 	half softIntersect = saturate( 0.25* ( fSceneDepth - fRainDepth ));
  float fWaterSoftIsec = saturate(0.25 * (IN.vPosition.w - g_fWaterLevel));

  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Simulate surface hits/splashes
  
  // Compute ground and water plane intersection
  half fGroundHit = (1-saturate( 0.05* ( fSceneDepth - fRainDepth) ))*0.5;
  half fWaterHit = 1-saturate( 0.5* (IN.vPosition.w - g_fWaterLevel));

  // Sum up hits
  fGroundHit += fWaterHit;  

  // Apply hit mask to simulate water splashes
  fHitMask = saturate(saturate(fHitMask)*4-0.2);    
  fGroundHit *= fHitMask;


  OUT.Color += fGroundHit;

  // Apply soft-intersection with surfaces and water plane
  OUT.Color *=  (1-PI_RainParamsPS.w) *0.5 *softIntersect*fWaterSoftIsec * PI_RainParamsPS.y * cRainColor;

  return OUT;
}

pixout DistantRainFinalPS(vtxOut IN)
{
  pixout OUT;

  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy);
  half4 cRain = tex2D(screenMapScaledSampler_d2, IN.baseTC.xy);
  OUT.Color = cScreen + cRain;

  return OUT;
}

////////////////// technique /////////////////////

technique DistantRain
{
  pass p0
  {
    VertexShader = CompileVS DistantRainVS();
    PixelShader = CompilePS DistantRainPS();    
    CullMode = None;        
  }
}

technique DistantRainFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DistantRainFinalPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Water puddles texgen technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 waterPuddlesParams;

///////////////// vertex shader //////////////////

struct vtxOutWaterPuddles
{
  float4 HPosition  : POSITION;
  float2 baseTC    : TEXCOORDN;
  float4 noiseTC    : TEXCOORDN;  
};

vtxOutWaterPuddles waterPuddlesVS(vtxIn IN)
{
  vtxOutWaterPuddles OUT = (vtxOutWaterPuddles)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = IN.baseTC.xy;
 
  return OUT;
}

float gaussian(float d2, float radius)
{
  return exp(-d2 / radius);
  //return saturate( 1- (d2*d2/radius) );
  
}

///////////////// pixel shader //////////////////
pixout waterPuddlesPS(vtxOutWaterPuddles IN)
{
  pixout OUT;
  
  float fvar = 0;
  //float2 vDropPos = 0.5;
  float2 vDropPos = (waterPuddlesParams.xy*2-1); //0.25 * float2(cos(AnimGenParams*4 + fvar), sin(AnimGenParams*4 + fvar));
  //(frac(AnimGenParams)>=0.5) *
  
   

   float2 offsets[4] = 
   {
      1,  0,
      -1, 0,         
      0,  1,
      0, -1,
   };

   float4 c = tex2D(_tex0,  IN.baseTC).x;
   float4 d = tex2D(_tex1,  IN.baseTC).x;
   float fDilateRatio = 1.0;
   float fSpeedFactor = 0.3333;
   float2 fPixSize = 1.0 / 256.0;
   float fDamping = 0.9985;//5;//95;//8;
      
   
   float4 l, r, t, b;
   l = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[0]).x;
   r = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[1]).x;
   t = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[2]).x;
   b = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[3]).x; 

   float fA = fSpeedFactor;
   float fB = 2.0 - 4.0 * fSpeedFactor;
  
   float sum = (r.x + l.x + t.x + b.x) * fA + fB * c.x - d.x;
   //float sum = (r.x + l.x + t.x + b.x) * 0.5 - d.x;
             
   OUT.Color = ( float4(sum.xxx, 1) *fDamping);  
   
   //(frac(AnimGenParams)>=0.5) *
   OUT.Color +=  waterPuddlesParams.w * gaussian( length(abs( frac(IN.baseTC.xy-vDropPos)*2-1 ) ) ,2.0/256.0 );// tex2D( _tex0, IN.baseTC.xy);

   //OUT.Color.xyz = 1-exp(-1.05*OUT.Color.x);
  
  return OUT;
}

pixout waterPuddlesDisplayPS(vtxOut IN)
{
  pixout OUT;

  float3 vWeights = 0;    
  vWeights.x = (tex2D( _tex0, IN.baseTC.xy ).x);
  vWeights.y = (tex2D( _tex0, IN.baseTC.xy + float2(1,0)/waterPuddlesParams.w).x);
  vWeights.z = (tex2D( _tex0, IN.baseTC.xy + float2(0,1)/waterPuddlesParams.w).x);
  
  // make it a bit sharper (maybe add a sharpening control)
  vWeights = ( vWeights *2 - 1 );
      
  float3 vNormal = float3( vWeights.x - vWeights.y, vWeights.x - vWeights.z,1);                  // 2 inst
  vNormal = normalize(vNormal.xyz);                                                              // 3 inst
 
  OUT.Color.xyz =vNormal*0.5+0.5;// tex2D( _tex0, IN.baseTC.xy);
  OUT.Color.w = vWeights.x*0.5+0.5;

  return OUT;
}
*/
#include "mastereffect_by_martymcfly.ini"
/*
////////////////// technique /////////////////////

technique WaterPuddlesGen
{
  pass p0
  {
    VertexShader = CompileVS waterPuddlesVS();
    PixelShader = CompilePS waterPuddlesPS();    
    CullMode = None;    
  }
}

technique WaterPuddlesDisplay
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS waterPuddlesDisplayPS();    
    CullMode = None;    
  }
}

#if %DYN_BRANCHING_POSTPROCESS

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Fillrate Profile technique /////////////////////////////////////////////////////////////////////////

pixout FilrateProfilePS(vtxOut IN)
{
  pixout OUT;
  
  const int nSamples = 32;
  float fRecipSamples = 1.0 / (float) nSamples ;
  
  half4 acc = 0;
#if D3D10
  [unroll]
#endif
  for(int n = 0; n < nSamples; n++)
  {
    acc += tex2D(_tex0, IN.baseTC.xy) + (frac(n * fRecipSamples*10)*2-1)*4;
  }

  OUT.Color = acc * fRecipSamples;

  return OUT;
}

////////////////// technique /////////////////////

technique FillrateProfile
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS FilrateProfilePS();
    CullMode = None;        
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Grain filter technique /////////////////////////////////////////////////////////////////////////

sampler2D grainNoiseSampler = sampler_state
{
  Texture = textures/defaults/vector_noise.dds;
  MinFilter = POINT;  
  MagFilter = POINT;
  MipFilter = POINT; 
  AddressU = Wrap;
  AddressV = Wrap;
};


pixout GrainFilterPS(vtxOut IN)
{
  pixout OUT;
  
  
  half4 acc = 0;

  float2 vNoiseTC = (IN.baseTC.xy ) * (PS_ScreenSize.xy/64.0) +  (psParams[0].xy/PS_ScreenSize.xy);
  float2 vNoise = tex2D(grainNoiseSampler, vNoiseTC)+ dot(IN.baseTC.xy, 1) * 65535;
  vNoise = frac( vNoise );

  vNoise = vNoise*2-1;
  //vNoise *= 0.05;

  half4 cScreen = tex2D(screenMapSampler, IN.baseTC);

  OUT.Color = cScreen + dot(vNoise.xy, 0.5)*psParams[0].w;


  return OUT;
}

////////////////// technique /////////////////////

technique GrainFilter
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS GrainFilterPS();
    CullMode = None;        
  }
}

*//**//**//**//**//**//**//**//**//**//**//**//**//**/

//+++++++++++++++++++++++++++++
//external parameters, do not modify
//+++++++++++++++++++++++++++++
//keyboard controlled temporary variables (in some versions exists in the config file). Press and hold key 1,2,3...8 together with PageUp or PageDown to modify. By default all set to 1.0
float4	tempF1; //0,1,2,3
float4	tempF2; //5,6,7,8
float4	tempF3; //9,0
//x=generic timer in range 0..1, period of 16777216 ms (4.6 hours), w=frame time elapsed (in seconds)
float4	Timer;
//x=Width, y=1/Width, z=ScreenScaleY, w=1/ScreenScaleY
float4	ScreenSize;
//Luma Coefficient
float3 LumCoeff = float3(0.212656, 0.715158, 0.072186);
//pi
float PI = acos(-1);

//textures
texture2D texColor;
texture2D texDepth;
texture2D texNoise;

sampler2D SamplerColor = sampler_state
{
	Texture   = <texColor>;
	MinFilter = LINEAR;
	MagFilter = LINEAR;
	MipFilter = LINEAR;//NONE;
	AddressU  = Clamp;
	AddressV  = Clamp;
	SRGBTexture=FALSE;
	MaxMipLevel=0;
	MipMapLodBias=0;
};

sampler2D SamplerDepth = sampler_state
{
	Texture = <texDepth>;
	MinFilter = LINEAR;
	MagFilter = LINEAR;
	MipFilter = NONE;
	AddressU = Clamp;
	AddressV = Clamp;
	SRGBTexture=FALSE;
	MaxMipLevel=0;
	MipMapLodBias=0;
};

sampler2D SamplerNoise = sampler_state
{
	Texture   = <texNoise>;
	MinFilter = POINT;
	MagFilter = POINT;
	MipFilter = NONE;//NONE;
	AddressU  = Wrap;
	AddressV  = Wrap;
	SRGBTexture=FALSE;
	MaxMipLevel=0;
	MipMapLodBias=0;
};

struct VS_OUTPUT_POST {
	float4 vpos  : POSITION;
	float2 txcoord : TEXCOORD0;
};

struct VS_INPUT_POST {
	float3 pos  : POSITION;
	float2 txcoord : TEXCOORD0;
};

//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Vertex shader
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

VS_OUTPUT_POST VS_PostProcess(VS_INPUT_POST IN)
{
	VS_OUTPUT_POST OUT;

	float4 pos=float4(IN.pos.x,IN.pos.y,IN.pos.z,1.0);

	OUT.vpos=pos;
	OUT.txcoord.xy=IN.txcoord.xy;

	return OUT;
}

//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Functions
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

float GrayScale(float3 sample)
{
	return dot(sample, float3(0.3, 0.59, 0.11));
}

float random(in float2 uv)
{
    float2 noise = (frac(sin(dot(uv , float2(12.9898,78.233) * 2.0)) * 43758.5453));
    return abs(noise.x + noise.y) * 0.5;
}


float Luminance( float3 c )
{
	return dot( c, float3(0.22, 0.707, 0.071) );
}

float vignette(float2 coord, float _int)
{
	float2 coords = coord;
	coords = (coords - 0.5) * 2.0;		
	float coordDot = dot (coords,coords);	
	return 1.0 - coordDot * _int * 0.1;
}

float linearize(float depth)
{
	return -zfar * znear / (depth * (zfar - znear) - zfar);
}

float2 rand(float2 coord) //generating noise/pattern texture for dithering
{
	float noiseX = ((frac(3.0-coord.x*(ScreenSize.x/0.2))*3.25)+(frac(coord.y*(ScreenSize.x*ScreenSize.z/0.2))*3.75))*0.1-0.2;
	float noiseY = ((frac(3.0-coord.x*(ScreenSize.x/0.2))*3.75)+(frac(coord.y*(ScreenSize.x*ScreenSize.z/0.2))*3.25))*0.1-0.2;
	
	return float2(noiseX,noiseY);
}

#define fFlareAxis			0		// blur axis
//people should not change that due to changes I made to the shader (blur in y direction so vertical flares would get no blur
//too lazy to adapt that so I'll keep it here

float3 BrightPass(float2 tex)
{
	float3 c = tex2D(SamplerColor, tex).rgb;
    	float3 bC = max(c - float3(fFlareLuminance, fFlareLuminance, fFlareLuminance), 0.0);
    	float bright = dot(bC, 1.0);
    	bright = smoothstep(0.0f, 0.5, bright);
    	return lerp(0.0, c, bright);
}

float3 AnamorphicSample(int axis, float2 tex, float blur)
{
	tex = 2.0 * tex - 1.0;
	if (!axis) tex.x /= -blur;
	else tex.y /= -blur;
	tex = 0.5 * tex + 0.5;
	return BrightPass(tex);
}

float mod(float x, float y)
{
	return x - y * floor (x/y);
}

float smootherstep(float edge0, float edge1, float x)
{
   x = clamp((x - edge0)/(edge1 - edge0), 0.0, 1.0);
   return x*x*x*(x*(x*6 - 15) + 10);
}

float3 Hue(in float3 RGB)
{
   // Based on work by Sam Hocevar and Emil Persson
   float Epsilon = 1e-10;
   float4 P = (RGB.g < RGB.b) ? float4(RGB.bg, -1.0, 2.0/3.0) : float4(RGB.gb, 0.0, -1.0/3.0);
   float4 Q = (RGB.r < P.x) ? float4(P.xyw, RGB.r) : float4(RGB.r, P.yzx);
   float C = Q.x - min(Q.w, Q.y);
   float H = abs((Q.w - Q.y) / (6 * C + Epsilon) + Q.z);
   return float3(H, C, Q.x);
}

float4 ChromaticAberrationPass(float2 tex, float outOfFocus)
{
	float d = distance(tex, float2(0.5, 0.5));
	float f = smoothstep(fBaseRadius, fFalloffRadius, d + outOfFocus * d);
	float3 chroma = pow(f + fvChroma, fChromaPower);
	
	float2 tr = ((2.0 * tex - 1.0) * chroma.r) * 0.5 + 0.5;
	float2 tg = ((2.0 * tex - 1.0) * chroma.g) * 0.5 + 0.5;
	float2 tb = ((2.0 * tex - 1.0) * chroma.b) * 0.5 + 0.5;
	
	float3 color = float3(tex2D(SamplerColor, tr).r, tex2D(SamplerColor, tg).g, tex2D(SamplerColor, tb).b) * (1.0 - f);
	
	return float4(color, 1.0);
}

float4 ChromaticAberrationFocusPass(float2 tex, float outOfFocus)
{
	float3 chroma = pow(fvChroma, CHROMA_POW * outOfFocus);

	float2 tr = ((2.0 * tex - 1.0) * chroma.r) * 0.5 + 0.5;
	float2 tg = ((2.0 * tex - 1.0) * chroma.g) * 0.5 + 0.5;
	float2 tb = ((2.0 * tex - 1.0) * chroma.b) * 0.5 + 0.5;
	
	float3 color = float3(tex2D(SamplerColor, tr).r, tex2D(SamplerColor, tg).g, tex2D(SamplerColor, tb).b) * (1.0 - outOfFocus);
	
	return float4(color, 1.0);
}

//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Passes
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

float penta(float2 coords) //pentagonal shape
{
	float scale = float(rings) - 1.5;
	float4  HS0 = float4( -20.0, -15.0, -15.0, -20.0);
	float4  HS1 = float4( -20.0, -15.0, -15.0, -20.0);
	float4  HS2 = float4( -20.0, -15.0, -15.0, -20.0);
	float4  HS3 = float4( -20.0, -15.0, -15.0, -20.0);
	float4  HS4 = float4( -20.0, -15.0, -15.0, -20.0);
	float4  HS5 = float4( -20.0, -15.0, -15.0, -20.0);
	
	float4  one = float4(8.0, 8.0, 8.0, 8.0);
	
	float4 P = float4(coords,float2(scale, scale)); 
	
	float4 dist = float4(4.0, 4.0, 4.0, 4.0);
	float inorout = 0.0;
	
	dist.x = dot( P, HS0 );
	dist.y = dot( P, HS1 );
	dist.z = dot( P, HS2 );
	dist.w = dot( P, HS3 );
	
	dist = smoothstep( -feather, feather, dist );
	
	inorout += dot( dist, one );
	
	dist.x = dot( P, HS4 );
	dist.y = HS5.w - abs( P.z );
	
	dist = smoothstep( -feather, feather, dist );
	inorout += dist.x;
	
	return saturate( inorout );
}

float4 colorDof(float2 coords,float blur) //processing the sample
{
	float4 colDF = float4(1,1,1,1);

	float2 pixelsize = ScreenSize.y;
	pixelsize.y *= ScreenSize.z;
	
	colDF.x = tex2Dlod(SamplerColor,float4(coords + float2(0.0,1.0)*pixelsize*fringe*blur,0,0)).x;
	colDF.y = tex2Dlod(SamplerColor,float4(coords + float2(-0.866,-0.5)*pixelsize*fringe*blur,0,0)).y;
	colDF.z = tex2Dlod(SamplerColor,float4(coords + float2(0.866,-0.5)*pixelsize*fringe*blur,0,0)).z;
	
	float3 lumcoeff = float3(0.299,0.587,0.114);
	float lum = dot(colDF.xyz,lumcoeff);
	float thresh = max((lum-threshold)*gain, 0.0);
	float3 nullcol = float3(0,0,0);
	colDF.xyz +=lerp(nullcol,colDF.xyz,thresh*blur);
	return colDF;
}

float3 ExplosionPass( float3 colorInput, float2 tex, float2 pixelsize )
{

  	// -- pseudo random number generator --
  	float2 sine_cosine;
  	sincos(dot(tex, float2(12.9898,78.233)),sine_cosine.x,sine_cosine.y);
  	sine_cosine = sine_cosine * 43758.5453 + tex;
  	float2 noise = frac(sine_cosine);

  	tex = (-Explosion_Radius * pixelsize) + tex; //Slightly faster this way because it can be calculated while we calculate noise.
  
  	colorInput.rgb = tex2D(SamplerColor, (2.0 * Explosion_Radius * pixelsize) * noise + tex).rgb;
  
 
  	return colorInput;
}

float3 CartoonPass( float3 colorInput, float2 tex, float2 pixelsize )
{
 
  	float diff1 = dot(LumCoeff,tex2D(SamplerColor, tex + pixelsize).rgb);
  	diff1 = dot(float4(LumCoeff,-1.0),float4(tex2D(SamplerColor, tex - pixelsize).rgb , diff1));
  
  	float diff2 = dot(LumCoeff,tex2D(SamplerColor, tex +float2(pixelsize.x,-pixelsize.y)).rgb);
  	diff2 = dot(float4(LumCoeff,-1.0),float4(tex2D(SamplerColor, tex +float2(-pixelsize.x,pixelsize.y)).rgb , diff2));
    
  	float edge = dot(float2(diff1,diff2),float2(diff1,diff2));
  
  	colorInput.rgb =  pow(edge,CartoonEdgeSlope) * -CartoonPower + colorInput.rgb;
	
	return saturate(colorInput);
}

float3 SharpPass( float3 colorInput, float2 tex, float2 pixelsize )
{
 
    	float3 blur_ori = tex2D(SamplerColor, tex + float2(0.5 * pixelsize.x,-pixelsize.y * SharpBias)).rgb*0.25;  	// South South East
    	blur_ori += tex2D(SamplerColor, tex + float2(SharpBias * -pixelsize.x,0.5 * -pixelsize.y)).rgb*0.25; 		// West South West
   	blur_ori += tex2D(SamplerColor, tex + float2(SharpBias * pixelsize.x,0.5 * pixelsize.y)).rgb*0.25; 		// East North East
    	blur_ori += tex2D(SamplerColor, tex + float2(0.5 * -pixelsize.x,pixelsize.y * SharpBias)).rgb*0.25;		// North North West

	float3 sharp = colorInput - blur_ori;
	float sharp_luma = dot(sharp, SharpStrength);
	
	sharp_luma = clamp(sharp_luma, -SharpClamp, SharpClamp);
	
	float3 done = tex2D(SamplerColor, tex).rgb + sharp_luma; 

	colorInput = done;

	return colorInput;
}

float3 LevelsPass( float3 colorInput )
{
	#define black_point_float ( Levels_black_point / 255.0 )
	#define white_point_float ( 255.0 / (Levels_white_point - Levels_black_point)) 

 	colorInput.rgb = colorInput.rgb * white_point_float - (black_point_float *  white_point_float);
  	return colorInput;
}

float3 TechnicolorPass( float3 colorInput )
{

	#define cyanfilter float3(0.0, 1.30, 1.0)
	#define magentafilter float3(1.0, 0.0, 1.05) 
	#define yellowfilter float3(1.6, 1.6, 0.05)

	#define redorangefilter float2(1.05, 0.620) //RG_
	#define greenfilter float2(0.30, 1.0)       //RG_
	#define magentafilter2 magentafilter.rb     //R_B

	float3 tcol = colorInput.rgb;
	
  	float2 rednegative_mul   = tcol.rg * (1.0 / (redNegativeAmount * TechniPower));
	float2 greennegative_mul = tcol.rg * (1.0 / (greenNegativeAmount * TechniPower));
	float2 bluenegative_mul  = tcol.rb * (1.0 / (blueNegativeAmount * TechniPower));
	
  	float rednegative   = dot( redorangefilter, rednegative_mul );
	float greennegative = dot( greenfilter, greennegative_mul );
	float bluenegative  = dot( magentafilter2, bluenegative_mul );
	
	float3 redoutput   = rednegative.rrr + cyanfilter;
	float3 greenoutput = greennegative.rrr + magentafilter;
	float3 blueoutput  = bluenegative.rrr + yellowfilter;
	
	float3 result = redoutput * greenoutput * blueoutput;
	colorInput.rgb = lerp(tcol, result, TechniAmount);
	return colorInput;
}

float3 DPXPass(float3 InputColor){

	static float3x3 RGB =
	{
	2.67147117265996,-1.26723605786241,-0.410995602172227,
	-1.02510702934664,1.98409116241089,0.0439502493584124,
	0.0610009456429445,-0.223670750812863,1.15902104167061
	};

	static float3x3 XYZ =
	{
	0.500303383543316,0.338097573222739,0.164589779545857,
	0.257968894274758,0.676195259144706,0.0658358459823868,
	0.0234517888692628,0.1126992737203,0.866839673124201
	};

	float DPXContrast = 0.1;
	float DPXGamma = 1.0;

	float RedCurve = DPXRed;
	float GreenCurve = DPXGreen;
	float BlueCurve = DPXBlue;
	
	float3 RGB_Curve = float3(DPXRed,DPXGreen,DPXBlue);
	float3 RGB_C = float3(DPXRedC,DPXGreenC,DPXBlueC);

	float3 B = InputColor.rgb;
	B = pow(B, 1.0/DPXGamma);
 	B = B * (1.0 - DPXContrast) + (0.5 * DPXContrast);

	float3 Btemp = (1.0 / (1.0 + exp(RGB_Curve / 2.0)));	  
	B = ((1.0 / (1.0 + exp(-RGB_Curve * (B - RGB_C)))) / (-2.0 * Btemp + 1.0)) + (-Btemp / (-2.0 * Btemp + 1.0));

    	float value = max(max(B.r, B.g), B.b);
	float3 color = B / value;
	color = pow(color, 1.0/DPXColorGamma);
	
	float3 c0 = color * value;
        c0 = mul(XYZ, c0);

	float luma = dot(c0, float3(0.30, 0.59, 0.11)); //Use BT 709 instead?
        c0 = (1.0 - DPXSaturation) * luma + DPXSaturation * c0;
	c0 = mul(RGB, c0);
	
	InputColor.rgb = lerp(InputColor.rgb, c0, DPXBlend);

	return InputColor;
}

float3 LiftGammaGainPass( float3 colorInput )
{
	// -- Get input --
	float3 color = colorInput.rgb;
	
	// -- Lift --
	color = color * (1.5-0.5 * RGB_Lift) + 0.5 * RGB_Lift - 0.5;
	color = saturate(color); //isn't strictly necessary, but doesn't cost performance.
	
	// -- Gain --
	color *= RGB_Gain; 
	
	// -- Gamma --
	colorInput.rgb = pow(color, 1.0 / RGB_Gamma); //Gamma
	
	// -- Return output --
	//return (colorInput);
	return saturate(colorInput);
}

float3 TonemapPass( float3 colorInput )
{
	float3 color = colorInput.rgb;

	color = saturate(color - Defog * FogColor); // Defog
	
	color *= pow(2.0f, Exposure); // Exposure
	
	color = pow(color, Gamma);    // Gamma -- roll into the first gamma correction in main.h ?
	
	float lum = dot(LumCoeff, color.rgb);
	
	float3 blend = lum.rrr; //dont use float3
	
	float L = saturate( 10.0 * (lum - 0.45) );
  	
	float3 result1 = 2.0f * color.rgb * blend;
	float3 result2 = 1.0f - 2.0f * (1.0f - blend) * (1.0f - color.rgb);
	
	float3 newColor = lerp(result1, result2, L);
	float A2 = Bleach * color.rgb; //why use a float for A2 here and then multiply by color.rgb (a float3)?
	float3 mixRGB = A2 * newColor;
	
	color.rgb += ((1.0f - A2) * mixRGB);
	
	float3 middlegray = dot(color,(1.0/3.0)); //1fps slower than the original on nvidia, 2 fps faster on AMD
	
	float3 diffcolor = color - middlegray; //float 3 here
	colorInput.rgb = (color + diffcolor * Saturation)/(1+(diffcolor*Saturation)); //saturation
	
	return colorInput;
}

float3 VibrancePass( float3 colorInput )
{
   	#define Vibrance_coeff float3(Vibrance_RGB_balance * Vibrance)

	float3 color = colorInput; //original input color
  	float3 lumCoeff = float3(0.212656, 0.715158, 0.072186);  //Values to calculate luma with

	float luma = dot(LumCoeff, color.rgb); //calculate luma (grey)

	float max_color = max(colorInput.r, max(colorInput.g,colorInput.b)); //Find the strongest color
	float min_color = min(colorInput.r, min(colorInput.g,colorInput.b)); //Find the weakest color

  	float color_saturation = max_color - min_color; //The difference between the two is the saturation

   	color.rgb = lerp(luma, color.rgb, (1.0 + (Vibrance_coeff * (1.0 - (sign(Vibrance_coeff) * color_saturation))))); //extrapolate between luma and original by 1 + (1-saturation) - current

 	return color; //return the result
}

float3 CurvesPass( float3 colorInput )
{
  float Curves_contrast_blend = Curves_contrast;


   /*-----------------------------------------------------------.
  /               Separation of Luma and Chroma                 /
  '-----------------------------------------------------------*/

  	// -- Calculate Luma and Chroma if needed --
  	#if Curves_mode != 2

    	//calculate luma (grey)
    	float luma = dot(LumCoeff, colorInput.rgb);

    	//calculate chroma
	float3 chroma = colorInput.rgb - luma;
  	#endif

  	// -- Which value to put through the contrast formula? --
  	// I name it x because makes it easier to copy-paste to Graphtoy or Wolfram Alpha or another graphing program
  	#if Curves_mode == 2
	float3 x = colorInput.rgb; //if the curve should be applied to both Luma and Chroma
	#elif Curves_mode == 1
	float3 x = chroma; //if the curve should be applied to Chroma
	x = x * 0.5 + 0.5; //adjust range of Chroma from -1 -> 1 to 0 -> 1
  	#else // Curves_mode == 0
    	float x = luma; //if the curve should be applied to Luma
  	#endif

   /*-----------------------------------------------------------.
  /                     Contrast formulas                       /
  '-----------------------------------------------------------*/

  	// -- Curve 1 --
  	#if Curves_formula == 1
    	x = sin(PI * 0.5 * x); // Sin - 721 amd fps, +vign 536 nv
    	x *= x;
    
    	//x = 0.5 - 0.5*cos(PI*x);
    	//x = 0.5 * -sin(PI * -x + (PI*0.5)) + 0.5;
  	#endif

  	// -- Curve 2 --
  	#if Curves_formula == 2
    	x = x - 0.5;  
    	x = ( x / (0.5 + abs(x)) ) + 0.5;
    
    	//x = ( (x - 0.5) / (0.5 + abs(x-0.5)) ) + 0.5;
  	#endif

  	// -- Curve 3 --
  	#if Curves_formula == 3
    	//x = smoothstep(0.0,1.0,x); //smoothstep
    	x = x*x*(3.0-2.0*x); //faster smoothstep alternative - 776 amd fps, +vign 536 nv
    	//x = x - 2.0 * (x - 1.0) * x* (x- 0.5);  //2.0 is contrast. Range is 0.0 to 2.0
  	#endif

  	// -- Curve 4 --
  	#if Curves_formula == 4
    	x = (1.0524 * exp(6.0 * x) - 1.05248) / (20.0855 + exp(6.0 * x)); //exp formula
  	#endif

  	// -- Curve 5 --
  	#if Curves_formula == 5
    	//x = 0.5 * (x + 3.0 * x * x - 2.0 * x * x * x); //a simplified catmull-rom (0,0,1,1) - btw smoothstep can also be expressed as a simplified catmull-rom using (1,0,1,0)
    	//x = (0.5 * x) + (1.5 -x) * x*x; //estrin form - faster version
    	x = x * (x * (1.5-x) + 0.5); //horner form - fastest version

    	Curves_contrast_blend = Curves_contrast * 2.0; //I multiply by two to give it a strength closer to the other curves.
  	#endif

 	// -- Curve 6 --
  	#if Curves_formula == 6
    	x = x*x*x*(x*(x*6.0 - 15.0) + 10.0); //Perlins smootherstep
  	#endif

	// -- Curve 7 --
  	#if Curves_formula == 7
    	//x = ((x-0.5) / ((0.5/(4.0/3.0)) + abs((x-0.5)*1.25))) + 0.5;
	x = x - 0.5;
	x = x / ((abs(x)*1.25) + 0.375 ) + 0.5;
	//x = ( (x-0.5) / ((abs(x-0.5)*1.25) + (0.5/(4.0/3.0))) ) + 0.5;
  	#endif

  	// -- Curve 8 --
  	#if Curves_formula == 8
    	x = (x * (x * (x * (x * (x * (x * (1.6 * x - 7.2) + 10.8) - 4.2) - 3.6) + 2.7) - 1.8) + 2.7) * x * x; //Techicolor Cinestyle - almost identical to curve 1
  	#endif

  	// -- Curve 9 --
  	#if Curves_formula == 9
    	x =  -0.5 * (x*2.0-1.0) * (abs(x*2.0-1.0)-2.0) + 0.5; //parabola
  	#endif

  	// -- Curve 10 --
  	#if Curves_formula == 10 //Half-circles

    	#if Curves_mode == 0
      	float xstep = step(x,0.5);
	float xstep_shift = (xstep - 0.5);
	float shifted_x = x + xstep_shift;
   	#else
      	float3 xstep = step(x,0.5);
	float3 xstep_shift = (xstep - 0.5);
	float3 shifted_x = x + xstep_shift;
    	#endif

	x = abs(xstep - sqrt(-shifted_x * shifted_x + shifted_x) ) - xstep_shift;

  	//x = abs(step(x,0.5)-sqrt(-(x+step(x,0.5)-0.5)*(x+step(x,0.5)-0.5)+(x+step(x,0.5)-0.5)))-(step(x,0.5)-0.5); //single line version of the above
    
  	//x = 0.5 + (sign(x-0.5)) * sqrt(0.25-(x-trunc(x*2))*(x-trunc(x*2))); //worse
  
  	/* // if/else - even worse
  	if (x-0.5)
  	x = 0.5-sqrt(0.25-x*x);
  	else
  	x = 0.5+sqrt(0.25-(x-1)*(x-1));
	*/

  	//x = (abs(step(0.5,x)-clamp( 1-sqrt(1-abs(step(0.5,x)- frac(x*2%1)) * abs(step(0.5,x)- frac(x*2%1))),0 ,1))+ step(0.5,x) )*0.5; //worst so far
	
	//TODO: Check if I could use an abs split instead of step. It might be more efficient
	
	Curves_contrast_blend = Curves_contrast * 0.5; //I divide by two to give it a strength closer to the other curves.
  	#endif

  	// -- Curve 11 --
  	#if Curves_formula == 11 //Cubic catmull
    	float a = 1.00; //control point 1
    	float b = 0.00; //start point
    	float c = 1.00; //endpoint
    	float d = 0.20; //control point 2
    	x = 0.5 * ((-a + 3*b -3*c + d)*x*x*x + (2*a -5*b + 4*c - d)*x*x + (-a+c)*x + 2*b); //A customizable cubic catmull-rom spline
  	#endif

  	// -- Curve 12 --
  	#if Curves_formula == 12 //Cubic Bezier spline
    	float a = 0.00; //start point
    	float b = 0.00; //control point 1
    	float c = 1.00; //control point 2
    	float d = 1.00; //endpoint

    	float r  = (1-x);
	float r2 = r*r;
	float r3 = r2 * r;
	float x2 = x*x;
	float x3 = x2*x;
	//x = dot(float4(a,b,c,d),float4(r3,3*r2*x,3*r*x2,x3));

	//x = a * r*r*r + r * (3 * b * r * x + 3 * c * x*x) + d * x*x*x;
	//x = a*(1-x)*(1-x)*(1-x) +(1-x) * (3*b * (1-x) * x + 3 * c * x*x) + d * x*x*x;
	x = a*(1-x)*(1-x)*(1-x) + 3*b*(1-x)*(1-x)*x + 3*c*(1-x)*x*x + d*x*x*x;
  	#endif

  	// -- Curve 13 --
  	#if Curves_formula == 13 //Cubic Bezier spline - alternative implementation.
    	float3 a = float3(0.00,0.00,0.00); //start point
    	float3 b = float3(0.25,0.15,0.85); //control point 1
    	float3 c = float3(0.75,0.85,0.15); //control point 2
    	float3 d = float3(1.00,1.00,1.00); //endpoint

    	float3 ab = lerp(a,b,x);           // point between a and b
    	float3 bc = lerp(b,c,x);           // point between b and c
    	float3 cd = lerp(c,d,x);           // point between c and d
    	float3 abbc = lerp(ab,bc,x);       // point between ab and bc
    	float3 bccd = lerp(bc,cd,x);       // point between bc and cd
    	float3 dest = lerp(abbc,bccd,x);   // point on the bezier-curve
    	x = dest;
  	#endif

  	// -- Curve 14 --
  	#if Curves_formula == 14
    	x = 1.0 / (1.0 + exp(-(x * 10.0 - 5.0))); //alternative exp formula
  	#endif

   /*-----------------------------------------------------------.
  /                 Joining of Luma and Chroma                  /
  '-----------------------------------------------------------*/

  	#if Curves_mode == 2 //Both Luma and Chroma
	float3 color = x;  //if the curve should be applied to both Luma and Chroma
	colorInput.rgb = lerp(colorInput.rgb, color, Curves_contrast_blend); //Blend by Curves_contrast

  	#elif Curves_mode == 1 //Only Chroma
	x = x * 2.0 - 1.0; //adjust the Chroma range back to -1 -> 1
	float3 color = luma + x; //Luma + Chroma
	colorInput.rgb = lerp(colorInput.rgb, color, Curves_contrast_blend); //Blend by Curves_contrast

  	#else // Curves_mode == 0 //Only Luma
    	x = lerp(luma, x, Curves_contrast_blend); //Blend by Curves_contrast
    	colorInput.rgb = x + chroma; //Luma + Chroma

  	#endif

  	//Return the result
  	return colorInput;
}

float3 SepiaPass( float3 colorInput )
{
	float3 sepia = colorInput.rgb;
	
	// calculating amounts of input, grey and sepia colors to blend and combine
	float grey = dot(sepia, LumCoeff);
	sepia *= ColorTone;
	
	float3 blend2 = (grey * GreyPower) + (colorInput.rgb / (GreyPower + 1));

	colorInput.rgb = lerp(blend2, sepia, SepiaPower);
	// returning the final color
	return colorInput;
}

float3 SkyrimTonemapPass( float3 color )
{
	float	grayadaptation = dot(color.xyz, LumCoeff);

	#if (POSTPROCESS==1)
	color.xyz =  color.xyz / (grayadaptation * EAdaptationMaxV1 + EAdaptationMinV1);
	float cgray = dot( color.xyz, LumCoeff);
	cgray = pow(cgray, EContrastV1);
	float3 poweredcolor = pow( color.xyz, EColorSaturationV1);
	float newgray = dot(poweredcolor.xyz, LumCoeff);
	color.xyz = poweredcolor.xyz * cgray / (newgray + 0.0001);
	float3	luma =  color.xyz;
	float	lumamax = 300.0;
	color.xyz = ( color.xyz * (1.0 +  color.xyz / lumamax)) / ( color.xyz + EToneMappingCurveV1);	
	#endif

	#if (POSTPROCESS==2)
	color.xyz =  color.xyz / (grayadaptation * EAdaptationMaxV2 + EAdaptationMinV2);
	float3 xncol = normalize( color.xyz);
	float3 scl =  color.xyz / xncol.xyz;
	scl = pow(scl, EIntensityContrastV2);
	xncol.xyz = pow(xncol.xyz, EColorSaturationV2);
	color.xyz = scl*xncol.xyz;
	float	lumamax = EToneMappingOversaturationV2;
	color.xyz = ( color.xyz * (1.0 +  color.xyz / lumamax)) / ( color.xyz + EToneMappingCurveV2);
 	color.xyz*=4;
	#endif

	#if (POSTPROCESS==3)
	color.xyz *= 35;
	float	lumamax = EToneMappingOversaturationV3;
	color.xyz = ( color.xyz * (1.0 +  color.xyz / lumamax)) / ( color.xyz + EToneMappingCurveV3);
	#endif

	#if (POSTPROCESS == 4)
	color.xyz =  color.xyz / (grayadaptation * EAdaptationMaxV4 + EAdaptationMinV4);
	float Y = dot( color.xyz, float3(0.299, 0.587, 0.114)); //0.299 * R + 0.587 * G + 0.114 * B;
	float U = dot( color.xyz, float3(-0.14713, -0.28886, 0.436)); //-0.14713 * R - 0.28886 * G + 0.436 * B;
	float V = dot( color.xyz, float3(0.615, -0.51499, -0.10001)); //0.615 * R - 0.51499 * G - 0.10001 * B;
	Y = pow(Y, EBrightnessCurveV4);
	Y = Y * EBrightnessMultiplierV4;
	color.xyz = V * float3(1.13983, -0.58060, 0.0) + U * float3(0.0, -0.39465, 2.03211) + Y;
	color.xyz = max( color.xyz, 0.0);
	color.xyz =  color.xyz / ( color.xyz + EBrightnessToneMappingCurveV4);
	#endif

	#if (POSTPROCESS == 5)
	float hnd = 1;
	float2 hndtweak = float2( 3.1 , 1.5 );
        color.xyz *= lerp( hndtweak.x, hndtweak.y, hnd );
	float3 xncol = normalize( color.xyz);
	float3 scl =  color.xyz/xncol.xyz;
	scl = pow(scl, EIntensityContrastV5);
	xncol.xyz = pow(xncol.xyz, EColorSaturationV5);
	color.xyz = scl*xncol.xyz;
	color.xyz *= HCompensateSatV5; // compensate for darkening caused my EcolorSat above
	color.xyz =  color.xyz / ( color.xyz + EToneMappingCurveV5);
	color.xyz *= 4;
	#endif

	#if (POSTPROCESS==6)
	//Postprocessing V6 by Kermles
	//tuned by the master himself for ME 1.4, thanks man!!!
	//hd6/ppv2///////////////////////////////////////////
	float 	EIntensityContrastV6 = EIntensityContrastV6Day;
	float 	EColorSaturationV6 = EColorSaturationV6Day;
	float 	HCompensateSatV6 = HCompensateSatV6Day;
	float 	EToneMappingCurveV6 = EToneMappingCurveV6Day;
	float 	EBrightnessV6 = EBrightnessV6Day;
	float 	EToneMappingOversaturationV6 = EToneMappingOversaturationV6Day;
	float 	EAdaptationMaxV6 = EAdaptationMaxV6Day;
	float 	EAdaptationMinV6 = EAdaptationMinV6Day;
	float	lumamax = EToneMappingOversaturationV6;
	//kermles////////////////////////////////////////////
	float4 	ncolor;					//temporary variable for color adjustments		
	//begin pp code/////////////////////////////////////////////////
	//ppv2 modified by kermles//////////////////////////////////////
		
	grayadaptation = clamp(grayadaptation, 0, 50);
	color.xyz *= EBrightnessV6;
	float3 xncol = normalize( color.xyz);
	float3 scl =  color.xyz/xncol.xyz;
	scl = pow(scl, EIntensityContrastV6);
	xncol.xyz = pow(xncol.xyz, EColorSaturationV6);
	color.xyz = scl*xncol.xyz;
	color.xyz *= HCompensateSatV6;
	color.xyz = ( color.xyz * (1.0 +  color.xyz/lumamax))/( color.xyz + EToneMappingCurveV6);
	color.xyz /= grayadaptation*EAdaptationMaxV6+EAdaptationMinV6;
	//rerun ppv2////////////////////////////////////////////////////
	color.xyz *= EBrightnessV6;
	xncol = normalize( color.xyz);
	scl =  color.xyz/xncol.xyz;
	scl = pow(scl, EIntensityContrastV6);
	xncol.xyz = pow(xncol.xyz, EColorSaturationV6);
	color.xyz = scl*xncol.xyz;
	color.xyz *= HCompensateSatV6;
	color.xyz = ( color.xyz * (1.0 +  color.xyz/lumamax))/( color.xyz + EToneMappingCurveV6);
	#endif

	return color;

}

float3 MoodPass( float3 colorInput )
{
	float3 colInput = colorInput;
	float3 colMood = 1.0f;
	colMood.r = moodR;
	colMood.g = moodG;
	colMood.b = moodB;
	float fLum = ( colInput.r + colInput.g + colInput.b ) / 3;
	colMood = lerp(0, colMood, saturate(fLum * 2.0));
	colMood = lerp(colMood, 1, saturate(fLum - 0.5) * 2.0);
	float3 colOutput = lerp(colInput, colMood, saturate(fLum * fRatio));
	colorInput=max(0, colOutput);
	return colorInput;
}

float3 CrossPass(float3 color)
{
	float2 CrossMatrix [3] = {
		float2 (1.03, 0.04),
		float2 (1.09, 0.01),
		float2 (0.78, 0.13),
 		};

	float3 image1 = color;
	float3 image2 = color;
	float gray = dot(float3(0.5,0.5,0.5), image1);  
	image1 = lerp (gray, image1,CrossSaturation);
	image1 = lerp (0.35, image1,CrossContrast);
	image1 +=CrossBrightness;
	image2.r = image1.r * CrossMatrix[0].x + CrossMatrix[0].y;
	image2.g = image1.g * CrossMatrix[1].x + CrossMatrix[1].y;
	image2.b = image1.b * CrossMatrix[2].x + CrossMatrix[2].y;
	color = lerp(image1, image2, CrossAmount);
	return color;
}

float3 FilmPass(float3 B)
{
	float3 G = B;
	float3 H = 0.01;
 
	B = pow(abs(B), Linearization);
	B = lerp(H, B, Contrast);
 
	float A = dot(B.rgb, LumCoeff);
	float3 D = A;
 
	B = pow(abs(B), 1.0 / BaseGamma);
 
	float a = FRedCurve;
	float b = FGreenCurve;
	float c = FBlueCurve;
	float d = BaseCurve;
 
	float y = 1.0 / (1.0 + exp(a / 2.0));
	float z = 1.0 / (1.0 + exp(b / 2.0));
	float w = 1.0 / (1.0 + exp(c / 2.0));
	float v = 1.0 / (1.0 + exp(d / 2.0));
 
	float3 C = B;
 
	D.r = (1.0 / (1.0 + exp(-a * (D.r - 0.5))) - y) / (1.0 - 2.0 * y);
	D.g = (1.0 / (1.0 + exp(-b * (D.g - 0.5))) - z) / (1.0 - 2.0 * z);
	D.b = (1.0 / (1.0 + exp(-c * (D.b - 0.5))) - w) / (1.0 - 2.0 * w);
 
	D = pow(abs(D), 1.0 / EffectGamma);
 
	float3 Di = 1.0 - D;
 
	D = lerp(D, Di, FBleach);
 
	D.r = pow(abs(D.r), 1.0 / EffectGammaR);
	D.g = pow(abs(D.g), 1.0 / EffectGammaG);
	D.b = pow(abs(D.b), 1.0 / EffectGammaB);
 
	if (D.r < 0.5)
		C.r = (2.0 * D.r - 1.0) * (B.r - B.r * B.r) + B.r;
	else
		C.r = (2.0 * D.r - 1.0) * (sqrt(B.r) - B.r) + B.r;
 
	if (D.g < 0.5)
		C.g = (2.0 * D.g - 1.0) * (B.g - B.g * B.g) + B.g;
	else
		C.g = (2.0 * D.g - 1.0) * (sqrt(B.g) - B.g) + B.g;
 	//if (AgainstAllAutority) 
	if (D.b < 0.5)
		C.b = (2.0 * D.b - 1.0) * (B.b - B.b * B.b) + B.b;
	else
		C.b = (2.0 * D.b - 1.0) * (sqrt(B.b) - B.b) + B.b;
 
	float3 F = lerp(B, C, Strenght);
 
	F = (1.0 / (1.0 + exp(-d * (F - 0.5))) - v) / (1.0 - 2.0 * v);
 
	float r2R = 1.0 - FSaturation;
	float g2R = 0.0 + FSaturation;
	float b2R = 0.0 + FSaturation;
 
	float r2G = 0.0 + FSaturation;
	float g2G = (1.0 - Fade) - FSaturation;
	float b2G = (0.0 + Fade) + FSaturation;
 
	float r2B = 0.0 + FSaturation;
	float g2B = (0.0 + Fade) + FSaturation;
	float b2B = (1.0 - Fade) - FSaturation;
 
	float3 iF = F;
 
	F.r = (iF.r * r2R + iF.g * g2R + iF.b * b2R);
	F.g = (iF.r * r2G + iF.g * g2G + iF.b * b2G);
	F.b = (iF.r * r2B + iF.g * g2B + iF.b * b2B);
 
	float N = dot(F.rgb, LumCoeff);
	float3 Cn = F;
 
	if (N < 0.5)
		Cn = (2.0 * N - 1.0) * (F - F * F) + F;
	else
		Cn = (2.0 * N - 1.0) * (sqrt(F) - F) + F;
 
	Cn = pow(abs(Cn), 1.0 / Linearization);
 
	float3 Fn = lerp(B, Cn, Strenght);
	return Fn;
}

float3 ReinhardToneMapping(in float3 x)
{
	const float W =  ReinhardWhitepoint;	// Linear White Point Value
    	const float K =  ReinhardScale;        // Scale

    	// gamma space or not?
    	return (1 + K * x / (W * W)) * x / (x + K);
}

float3 ReinhardLinearToneMapping(in float3 x)
{
    	const float W = ReinhardLinearWhitepoint;	        // Linear White Point Value
    	const float L = ReinhardLinearPoint;           // Linear point
    	const float C = ReinhardLinearSlope;           // Slope of the linear section
    	const float K = (1 - L * C) / C; // Scale (fixed so that the derivatives of the Reinhard and linear functions are the same at x = L)
    	float3 reinhard = L * C + (1 - L * C) * (1 + K * (x - L) / ((W - L) * (W - L))) * (x - L) / (x - L + K);

    	// gamma space or not?
    	return (x > L) ? reinhard : C * x;
}

float3 HaarmPeterDuikerFilmicToneMapping(in float3 x)
{
    	x = max( (float3)0.0f, x - 0.004f );
    	return pow( abs( ( x * ( 6.2f * x + 0.5f ) ) / ( x * ( 6.2f * x + 1.7f ) + 0.06 ) ), 2.2f );
}

float3 CustomToneMapping(in float3 x)
{
	const float A = 0.665f;
	const float B = 0.09f;
	const float C = 0.004f;
	const float D = 0.445f;
	const float E = 0.26f;
	const float F = 0.025f;
	const float G = 0.16f;//0.145f;
	const float H = 1.1844f;//1.15f;

    // gamma space or not?
	return (((x*(A*x+B)+C)/(x*(D*x+E)+F))-G) / H;
}

float3 ColormodPass( float3 color )
{
	color.xyz = (color.xyz - dot(color.xyz, 0.333)) * ColormodChroma + dot(color.xyz, 0.333);
	color.x = (pow(color.x, ColormodGammaR) - 0.5) * ColormodContrastR + 0.5 + ColormodBrightnessR;
	color.y = (pow(color.y, ColormodGammaG) - 0.5) * ColormodContrastG + 0.5 + ColormodBrightnessB;
	color.z = (pow(color.z, ColormodGammaB) - 0.5) * ColormodContrastB + 0.5 + ColormodBrightnessB;
	return color;	
}

float3 SphericalPass( float3 color )
{
	float3 signedColor = color.rgb * 2.0 - 1.0;
	float3 sphericalColor = sqrt(1.0 - signedColor.rgb * signedColor.rgb);
	sphericalColor = sphericalColor * 0.5 + 0.5;
	sphericalColor *= color.rgb;
	color.rgb += sphericalColor.rgb * sphericalAmount;
	color.rgb *= 0.95;
	return color;
}

float4 LeiFX_Reduct( float4 colorInput, float2 tex )
{

	float2 res;
	res.x = ScreenSize.x;
	res.y = ScreenSize.x*ScreenSize.z;
	float2 what;
	what.x = 1 / ScreenSize.x;
	what.y = 1 / (ScreenSize.x*ScreenSize.z);

	float2 dithet = tex.xy * res.xy;

	dithet.x = tex.x * res.x;
	dithet.y = tex.y * res.y;

	float2 ditheu = tex.xy * res.xy;

	ditheu.x = tex.x * res.x;
	ditheu.y = tex.y * res.y;

	// 2x2 matrix?

	float vertline1 = 	(mod(dithet.x, 		2.0));
	float vertline2 = 	(mod(dithet.x+1, 	2.0));
	float vertline3 = 	(mod(dithet.x+1, 	4.0));
	float vertline4 = 	(mod(dithet.x+1,	4.0));
	float vertline5 = 	(mod(dithet.x-1,	4.0));
	float horzline1 = 	(mod(dithet.y, 		2.0));
	float horzline2 = 	(mod(dithet.y+1,  	2.0));
	float horzline3 = 	(mod(dithet.y,  	4.0));
	float horzline4 = 	(mod(dithet.y+1,  	4.0));
	float horzline5 = 	(mod(dithet.y-1,  	4.0));

	float vertline3a = 	(mod(dithet.x+3, 	4.0));
	float horzline3a = 	(mod(dithet.y+2,  	4.0));


	float dithone 		= vertline1 + horzline2;
	float dithtwo 		= vertline2 + horzline1;
	float diththree 	= vertline3 + horzline3;
	float dithfour 		= vertline4 + horzline5;
	float dithfive 		= vertline3 + horzline3;
	float dithsix 		= vertline3 + horzline3;
	float dithsixy 		= vertline3a + horzline3a;

	float3 ditherX, dithero, ditherv, ditherg, ditherx;

	dithone = dithone * 0.3;
	dithtwo = dithtwo * 0.3 + 1;
	dithone *= dithtwo;

	dithfour = dithfour * 0.3;
	dithfour *= dithfive;


	dithsix *= dithtwo;
	dithsixy *= dithtwo;

	dithfive *= dithtwo;

	dithfour = pow(dithfour, 2.0f);
	

	// Lamest crudest 'dither matrix' ever.

	// The estimated dither pattern

		//      . X . o . X . o . X . o . X
		//      v g x . v g x . v g x . v g
		//      . o . o . o . o . o . o . o
		//      x . v g x . v g x . v g x .
		//      . X . o . X . o . X . o . X
		//	v g v . v g v . v g v . v g 
	
	float3 dithapick;
	float3 XX, oo, vv, xx, gg;


	XX = 0.018f;
	vv = 0.02f;
	xx = 0.015f;
	oo = -0.003f;
	gg = -60.93f;


	ditherX = dithone;
	if (ditherX.b < 1.0f) ditherX.rgb = 0;
	else	ditherX.rgb = XX.rgb;

	ditherx = diththree;
	if (ditherx.r > 1.3f) ditherx.rgb = 0;
	else	ditherx.rgb = xx.rgb;

	ditherv = dithone;
	if (ditherv.b < 0.7f) ditherv.rgb = vv.rgb;
	else	ditherv.rgb = 0;




	dithero = dithsix;
	if (dithero.r > 2.2f) dithero.rgb = 0;
	else	dithero.rgb = -0.018f;

	ditherg = dithsixy;
	if (ditherg.r > 2.2f) ditherg.rgb = 0;
	else	ditherg.rgb = -0.006f;


	dithapick = ditherv;
	dithapick = ditherx + ditherX + ditherv + ditherg + dithero;

	// This is the stupidest set of hacks ever to get just this stupid dither
	// pattern. It obviously could be done better, but eh.......
	// i'm bad at math :(

	// Matrix numbers....
	float3 dithonme;
	float3 dithonyou;

	//dithonyou = dithapick.r + dithapick.g + dithapick.b;
	dithapick.b = dithapick.r;
	dithapick.b = dithapick.r;
	dithonme.r = colorInput.r + dithapick;
	dithonme.g = colorInput.g + dithapick;
	dithonme.b = colorInput.b + dithapick;


	float eeee = 0.034f;

	dithonyou.rgb = colorInput.rgb;

	if (dithonyou.r > dithonme.r) dithonme.r = dithonyou.r;
	if (dithonyou.g > dithonme.g) dithonme.g = dithonyou.g;
	if (dithonyou.b > dithonme.b) dithonme.b = dithonyou.b;

	if (dithonyou.r < dithonme.r) dithonyou.r = dithonme.r;
	if (dithonyou.g < dithonme.g) dithonyou.g = dithonme.g;
	if (dithonyou.b < dithonme.b) dithonyou.b = dithonme.b;

	if (colorInput.r > 0) colorInput.r = dithonyou.r;
	if (colorInput.g > 0) colorInput.g = dithonyou.g;
	if (colorInput.b > 0) colorInput.b = dithonyou.b;

	//
	// Reduce to 16-bit color
	//

	float3 why = 1;
	float3 reduceme = 1;
	float radooct = 32;	// 32 is usually the proper value

	reduceme.r = pow(colorInput.r, why);  
	reduceme.r *= radooct;	
	reduceme.r = int(floor(reduceme.r));	
	reduceme.r /= radooct; 
	reduceme.r = pow(reduceme.r, why);

	reduceme.g = pow(colorInput.g, why);  
	reduceme.g *= radooct * 2;	
	reduceme.g = int(floor(reduceme.g));	
	reduceme.g /= radooct * 2; 
	reduceme.g = pow(reduceme.g, why);

	reduceme.b = pow(colorInput.b, why);  
	reduceme.b *= radooct;	
	reduceme.b = int(floor(reduceme.b));	
	reduceme.b /= radooct; 
	reduceme.b = pow(reduceme.b, why);

	colorInput.rgb = reduceme.rgb;

	// END REDUCTION

//	colorInput.r *= 1.1;

	return colorInput;
}


float4 LeiFX_Filter( float4 colorInput, float2 tex )
{
	//return colorInput;

	float2 pixelsize 	= float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

   	float2 kick = pixelsize;

	// Sample things.

	float blendy;	// to blend unblended with blend... trying to smooth the jag :(
	float blenda;	

	float blendfactor;

	float3 pixel1 = tex2D(SamplerColor, tex + float2((pixelsize.x * 0.47), 0)).rgb;
	float3 pixel2 = tex2D(SamplerColor, tex + float2(-pixelsize.x * 1.3, 0)).rgb;
	float3 pixel0 = tex2D(SamplerColor, tex + float2(0, 0)).rgb;

	float3 pixelblend;


	float gary1 = dot(pixel1.rgb,1);
	float gary2 = dot(pixel2.rgb,1);

	float mean = 1.0;
	mean = gary1 - gary2;

	if (mean < 0)	mean *= -1;
	if (mean > 1) mean = 1;	
	//mean = pow(mean, 0.25f);	// Adjust this value if you want to control the blur...
	mean = pow(mean, 0.18f);	// Adjust this value if you want to control the blur...

	if (mean > 1) mean = 1;	

	// New less crappy way
	{
		// variably BLEND IT ALL TO H*CK!!!!
		blendy = 1 - mean;
		blenda = 1 - blendy;
		pixel0 /= 3;
		pixel1 /= 3;
		pixel2 /= 3;
   		pixelblend.rgb = pixel0 + pixel1 + pixel2;
		colorInput.rgb = (pixelblend.rgb * blendy) + (colorInput.rgb * blenda);
	}

  return colorInput;
}

float4 LeiFX_Gamma( float4 colorInput, float2 tex )
{
	// moved the '4x1 line' stuff into here
	float2 res;
	res.x = ScreenSize.x;
	res.y = ScreenSize.x*ScreenSize.z;
	float gammaed = 0.15;
	float leifx_linegamma = gammaed;
	float2 dithet = tex.xy * res.xy;
	dithet.y = tex.y * res.y;
	float horzline1 = 	(mod(dithet.y, 	2.0));
	if (horzline1 < 1)	leifx_linegamma = 0;
	float leifx_gamma = 1.3 - gammaed + leifx_linegamma;


	colorInput.r = pow(colorInput.r, 1.0 / leifx_gamma);
	colorInput.g = pow(colorInput.g, 1.0 / leifx_gamma);
	colorInput.b = pow(colorInput.b, 1.0 / leifx_gamma);

	return colorInput;
}


float BloomPass1 (float2 tex, float2 pixelsize )
{

	float result = 0;	

	result += tex2D(SamplerColor, tex + float2(0.0, -20.0) * pixelsize ).a * 0.01222;
	result += tex2D(SamplerColor, tex + float2(0.0, -16.0) * pixelsize ).a * 0.02783;
	result += tex2D(SamplerColor, tex + float2(0.0, -12.0) * pixelsize ).a * 0.06559;
	result += tex2D(SamplerColor, tex + float2(0.0,  -8.0) * pixelsize ).a * 0.12098;
	result += tex2D(SamplerColor, tex + float2(0.0,  -4.0) * pixelsize ).a * 0.17467;
	result += tex2D(SamplerColor, tex + float2(0.0,   0.0) * pixelsize ).a * 0.19741;
	result += tex2D(SamplerColor, tex + float2(0.0,   4.0) * pixelsize ).a * 0.17467;
	result += tex2D(SamplerColor, tex + float2(0.0,   8.0) * pixelsize ).a * 0.12098;
	result += tex2D(SamplerColor, tex + float2(0.0,  12.0) * pixelsize ).a * 0.06559;
	result += tex2D(SamplerColor, tex + float2(0.0,  16.0) * pixelsize ).a * 0.02783;
	result += tex2D(SamplerColor, tex + float2(0.0,  20.0) * pixelsize ).a * 0.01222;

	return result;
}


float BloomPass2 (float2 tex, float2 pixelsize )
{

	float result = 0;	

	result += tex2D(SamplerColor, tex + float2(-20.0, 0) * pixelsize ).a * 0.01222;
	result += tex2D(SamplerColor, tex + float2(-16.0, 0) * pixelsize ).a * 0.02783;
	result += tex2D(SamplerColor, tex + float2(-12.0, 0) * pixelsize ).a * 0.06559;
	result += tex2D(SamplerColor, tex + float2(-8.0,   0) * pixelsize ).a * 0.12098;
	result += tex2D(SamplerColor, tex + float2(-4.0,   0) * pixelsize ).a * 0.17467;
	result += tex2D(SamplerColor, tex + float2(0.0,   0) * pixelsize ).a * 0.19741;
	result += tex2D(SamplerColor, tex + float2(4.0,   0) * pixelsize ).a * 0.17467;
	result += tex2D(SamplerColor, tex + float2(8.0,   0) * pixelsize ).a * 0.12098;
	result += tex2D(SamplerColor, tex + float2(12.0,   0) * pixelsize ).a * 0.06559;
	result += tex2D(SamplerColor, tex + float2(16.0,   0) * pixelsize ).a * 0.02783;
	result += tex2D(SamplerColor, tex + float2(20.0,   0) * pixelsize ).a * 0.01222;

	return result;
}


float3 colorhuefx_prod80( float3 color )
{
	
	float3 fxcolor = saturate( color.xyz );
	float greyVal = dot( fxcolor.xyz, LumCoeff.xyz );
	float3 HueSat = Hue( fxcolor.xyz );
	float colorHue = HueSat.x;
	float colorInt = HueSat.z - HueSat.y * 0.5;
	float colorSat = HueSat.y / ( 1.0 - abs( colorInt * 2.0 - 1.0 ) * 1e-10 );

	//When color intensity not based on original saturation level
   	if ( USE_COLORSAT == 0 )   colorSat = 1.0f;

	float hueMin_1 = hueMid - hueRange;
	float hueMax_1 = hueMid + hueRange;
	float hueMin_2 = 0.0f;
	float hueMax_2 = 0.0f;


   	if ( hueMin_1 < 0.0 )
   	{
   		hueMin_2 = 1.0f + hueMin_1;
   		hueMax_2 = 1.0f + hueMid;
   
      		if ( colorHue >= hueMin_1 && colorHue <= hueMid )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, smootherstep( hueMin_1, hueMid, colorHue ) * ( colorSat * satLimit ));
      		else if ( colorHue >= hueMid && colorHue <= hueMax_1 )
        		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, ( 1.0f - smootherstep( hueMid, hueMax_1, colorHue )) * ( colorSat * satLimit ));
      		else if ( colorHue >= hueMin_2 && colorHue <= hueMax_2 )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, smootherstep( hueMin_2, hueMax_2, colorHue ) * ( colorSat * satLimit ));
      		else
         		fxcolor.xyz = greyVal.xxx;
   	}

   	else if ( hueMax_1 > 1.0 )
   	{
   		hueMin_2 = 0.0f - ( 1.0f - hueMid );
   		hueMax_2 = hueMax_1 - 1.0f;

      		if ( colorHue >= hueMin_1 && colorHue <= hueMid )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, smootherstep( hueMin_1, hueMid, colorHue ) * ( colorSat * satLimit ));
      		else if ( colorHue >= hueMid && colorHue <= hueMax_1 )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, ( 1.0f - smootherstep( hueMid, hueMax_1, colorHue )) * ( colorSat * satLimit ));
      		else if ( colorHue >= hueMin_2 && colorHue <= hueMax_2 )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, ( 1.0f - smootherstep( hueMin_2, hueMax_2, colorHue )) * ( colorSat * satLimit ));
      		else
         		fxcolor.xyz = greyVal.xxx;
   	}	
   
	else
   	{
      		if ( colorHue >= hueMin_1 && colorHue <= hueMid )
        		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, smootherstep( hueMin_1, hueMid, colorHue ) * ( colorSat * satLimit ));
      		else if ( colorHue > hueMid && colorHue <= hueMax_1 )
         		fxcolor.xyz = lerp( greyVal.xxx, fxcolor.xyz, ( 1.0f - smootherstep( hueMid, hueMax_1, colorHue )) * ( colorSat * satLimit ));
      		else
         		fxcolor.xyz = greyVal.xxx;
   	}

   	color.xyz = lerp( color.xyz, fxcolor.xyz, fxcolorMix );

	return color.xyz;

}

float linearlizeDepth(float nonlinearDepth)
{
	float2 dofProj=float2(0.0509804, 3098.0392);
	float2 dofDist=float2(0.0, 0.0509804);

	float4 depth=nonlinearDepth;
	
	depth.y=-dofProj.x + dofProj.y;
	depth.y=1.0/depth.y;
	depth.z=depth.y * dofProj.y; 
	depth.z=depth.z * -dofProj.x; 
	depth.x=dofProj.y * -depth.y + depth.x;
	depth.x=1.0/depth.x;

	depth.y=depth.z * depth.x;

	depth.x=depth.z * depth.x - dofDist.y; 
	depth.x+=dofDist.x * -0.5;

	depth.x=max(depth.x, 0.0);
		
	return depth.x;
}



//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Pixel shaders
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#if (USE_PETKAGTADOF == 1)
float4 PS_ProcessDoFBokeh(VS_OUTPUT_POST IN, float2 vPos : VPOS) : COLOR 
{
	float depth = linearize(tex2D(SamplerDepth,IN.txcoord.xy).x);
	
	float fDepth = focalDepth;
	
	#if (DOF_AUTO == 1)
		fDepth = linearize(tex2D(SamplerDepth,focus).x);
	#endif
	
	float blur = 2.0;
	#if (DOF_MANUAL == 1)
		float a = depth-fDepth; //focal plane
		float b = (a-fdofstart)/fdofdist; //far DoF
		float c = (-a-ndofstart)/ndofdist; //near Dof
		blur = (a>0.0)?b:c;
	#else
		float f = focalLength; //focal length in mm
		float d = fDepth*1000.0; //focal plane in mm
		float o = depth*1000.0; //depth in mm
		
		float a = (o*f)/(o-f); 
		float b = (d*f)/(d-f); 
		float c = (d-f)/(d*fstop*CoC); 
		
		blur = abs(a-b)*c;
	#endif
	blur = saturate(blur);
	float2 noise = rand(IN.txcoord.xy)*namount*blur;
	
	float w = (1.0/ScreenSize.x)*blur*maxblur+noise.x;
	float h = (1.0/ScreenSize.x*ScreenSize.z)*blur*maxblur+noise.y;
	
	float4 col = float4(0,0,0,1);
	
	if(blur < 0.05) //some optimization thingy
	{
		col = tex2D(SamplerColor, IN.txcoord.xy);
	}
	else
	{
	col = tex2D(SamplerColor, IN.txcoord.xy);
	float s = 1.0;
	int ringsamples;
	for (int i = 1; i <= rings; i += 1)
	{
		ringsamples = i * samples;
		for (int j = 0 ; j < ringsamples ; j += 1)
		{
			float step = PI*2.0 / ringsamples;
			float pw = cos(j*step)*i;
			float ph = sin(j*step)*i;
			float p = 1.0;
			#if (DOF_PENTAGONSHAPE == 1)
				p = penta(float2(pw,ph));
			#endif
			col.xyz += colorDof(IN.txcoord.xy + float2(pw*w,ph*h),blur).xyz;  
			s += 1.0*lerp(1.0,i/rings,bias)*p;
		}
	}
	col = col/s; //divide by sample count
	}
	
	#if( DOF_VIGNETTING == 1)
		col *= vignette(IN.txcoord.xy,vignint);
	#endif
	
	return col;
}
#endif


#if (USE_MATSODOF==1)

// Fast depth of field pixel shader (Matso code)
float4 PS_ProcessPass_FastDoF(VS_OUTPUT_POST IN, float2 vPos : VPOS, uniform int axis) : COLOR
{
	float4 res;
	float2 coord = IN.txcoord.xy;
	float4 tcol = tex2D(SamplerColor, coord.xy);
	float sd = tex2D(SamplerDepth, coord).x;

	float sf = 0;

#if (USE_AUTOFOCUS == 1)
	sf = tex2D(SamplerDepth, 0.5).x;
#endif

#if ( USE_SMOOTH_DOF == 1)
	sf -= fFocusBias * 2.0;
#else
	sf -= fFocusBias;
#endif

	float outOfFocus = DOF(sd, sf);
		
	float offset[4] = { -1.282, -0.524, 0.524, 1.282 };
	float2 tdirs[4] = { float2(1.0, 0.0), float2(0.0, 1.0), float2(0.707, 0.707), float2(-0.707, 0.707) };
	//float2 taps[4] = { float2(-1.282, 0.524), float2(0.524, -1.282), float2(-1.282, -0.524), float2(0.524, 1.282) };
	float blur = DOF_SCALE * outOfFocus;
#if (USE_BOKEH_DOF==1)
	float wValue = (1.0 + pow(length(tcol.rgb) + 0.1, fBokehCurve)) * (1.0 - fBokehLight);	// special recipe from papa Matso ;)
#else
	float wValue = 1.0;
#endif
	
	tdirs[axis].x *= fvTexelSize.x;
	tdirs[axis].y *= fvTexelSize.y;
	
#if( USE_BOKEH_DOF == 1)
	blur *= 0.25;
#endif
		
	for (int i = 0; i < 4; i++)
	{
		//float2 t = taps[i] * fvTexelSize;
		
		float2 tdir = offset[i] * tdirs[axis] * blur;
		//float2 tdir = blur * (tdirs[axis] + t);
		coord.xy = IN.txcoord.xy + tdir.xy;
#if (USE_CHROMA_DOF == 1)
		float4 ct = ChromaticAberrationFocusPass(coord.xy, outOfFocus);
#else
		float4 ct = tex2D(SamplerColor, coord.xy);
#endif

#if (USE_BOKEH_DOF == 0)
		float w = 1.0 + abs(offset[i]);	// weight blur for better effect
#else	
		float ds = tex2D(SamplerDepth, coord.xy).x;
		float offs = DOF(ds, sf);
		
	#if (USE_BOKEH_DOF == 1)	// my own pseudo-bokeh weighting
		float b = GrayScale(ct.rgb) + length(ct.rgb) + 0.1;
		float w = pow(b, fBokehCurve) + abs(offset[i]);
	#endif
#endif
		tcol += ct * w;
		wValue += w;
	}

	tcol /= wValue;

#if (USE_SPLITSCREEN==1)
return (IN.txcoord.x > 0.5) ? tex2D(SamplerColor, IN.txcoord) : tcol;
#endif
		
	res.xyz = tcol.xyz;



	res.w = 1.0;
	return res;
}

#endif


#if( USE_GP65CJ042DOF == 1)

float4 PS_GPDOFFocus(VS_OUTPUT_POST IN, float2 vPos : VPOS) : COLOR
{
	float4 res;
	float2 coord=IN.txcoord.xy;

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return tex2D(SamplerColor, coord.xy);
	#endif

	float2 uvsrc=FocusPoint;

	float2 pixelSize=ScreenSize.y;
	pixelSize.y*=ScreenSize.z;
	
	const float2 offset[4]=
	{
		float2(0.0, 1.0),
		float2(0.0, -1.0),
		float2(1.0, 0.0),
		float2(-1.0, 0.0)
	};

	float resdepth=linearlizeDepth(tex2D(SamplerDepth, uvsrc.xy).x);
	for (int i=0; i<4; i++)
	{
		uvsrc.xy=uvsrc.xy;
		uvsrc.xy+=offset[i] * pixelSize.xy * FocusSampleRange;
		#if (NOT_BLURRING_SKY_MODE==1)
			resdepth+=linearlizeDepth(tex2D(SamplerDepth, uvsrc).x);
		#else
			resdepth+=min(linearlizeDepth(tex2D(SamplerDepth, uvsrc).x), DepthClip);
		#endif
	}
	resdepth*=0.2;

	float scenefocus=resdepth;
	#if (AUTO_FOCUS == 0)
	scenefocus = (ManualFocusDepth+1)*0.00001; //+1 damit es bei 0 nicht 0 ist denn 1 ist das Niedrigste was sein kann ohne bugs
	#endif

	float4 origcolor=tex2D(SamplerColor, coord.xy);
	float scenedepth=tex2D(SamplerDepth, IN.txcoord.xy).x;
	
	res.xyz=origcolor.xyz;

	float depth=linearlizeDepth(scenedepth);

	float focalPlaneDepth=scenefocus;
	float farBlurDepth=scenefocus*pow(4.0, FarBlurCurve);
	
	
	#if( TILT_SHIFT == 1)
		float shiftAngle=(frac(TiltShiftAngle / 90.0) == 0) ? 0.0 : TiltShiftAngle;
		float depthShift=1.0 + (0.5 - coord.x)*tan(-shiftAngle * 0.017453292);
		focalPlaneDepth*=depthShift;
		farBlurDepth*=depthShift;
	#endif
	
	
	if(depth < focalPlaneDepth)
		res.w=(depth - focalPlaneDepth)/focalPlaneDepth;
	else
	{
		res.w=(depth - focalPlaneDepth)/(farBlurDepth - focalPlaneDepth);
		res.w=saturate(res.w);
	}

	res.w=res.w * 0.5 + 0.5;
	
	#if ( NOT_BLURRING_SKY_MODE == 1)
		#define	DEPTH_OF_FIELD_QULITY 0
		res.w=(depth > 1000.0) ? 0.5 : res.w;
	#endif

	float blurAmount=abs(res.w * 2.0 - 1.0);

	float discRadius=blurAmount * float(DEPTH_OF_FIELD_QULITY) * RadiusSacleMultipiler;
	
	discRadius*=(depth < 0.5) ? (1.0 / max(NearBlurCurve, 1.0)) : 1.0;
	
	float3 distortion=float3(-1.0, 0.0, 1.0);
	distortion*=ChromaticAberrationAmount*discRadius;

	origcolor=tex2D(SamplerColor, coord.xy + pixelSize.xy*distortion.x);
	origcolor.w=smoothstep(0.0, depth, origcolor.w);
	res.x=lerp(res.x, origcolor.x, origcolor.w);
	
	origcolor=tex2D(SamplerColor, coord.xy + pixelSize.xy*distortion.z);
	origcolor.w=smoothstep(0.0, depth, origcolor.w);
	res.z=lerp(res.z, origcolor.z, origcolor.w);
	
	return res;
}

float4 PS_GPDOFBokehblur(VS_OUTPUT_POST IN, float2 vPos : VPOS) : COLOR
{
	float4 res;
	
	float2 coord=IN.txcoord.xy;

	float4 origcolor=tex2D(SamplerColor, coord.xy);
	
	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return origcolor;
	#endif

	float centerDepth=origcolor.w;

	float2 pixelSize=ScreenSize.y;
	pixelSize.y*=ScreenSize.z;
	
	float blurAmount=abs(centerDepth * 2.0 - 1.0);
	float discRadius=blurAmount * float(DEPTH_OF_FIELD_QULITY);
	discRadius*=RadiusSacleMultipiler;
	
	discRadius*=(centerDepth < 0.5) ? (1.0 / max(NearBlurCurve, 1.0)) : 1.0;
	
	res.xyz=origcolor.xyz;
	res.w=dot(res.xyz, 0.3333);
	res.w=max((res.w - BokehBrightnessThreshold) * BokehBrightnessMultipiler, 0.0);
	res.xyz*=1.0 + res.w*blurAmount;
	
	res.w=1.0;
	
	int sampleCycle=0;
	int sampleCycleCounter=0;
	int sampleCounterInCycle=0;
	
	#if ( POLYGONAL_BOKEH == 1)
		float basedAngle=360.0 / POLYGON_NUM;
		float2 currentVertex;
		float2 nextVertex;
	
		int	dofTaps=DEPTH_OF_FIELD_QULITY * (DEPTH_OF_FIELD_QULITY + 1) * POLYGON_NUM / 2.0;
	#else
		int	dofTaps=DEPTH_OF_FIELD_QULITY * (DEPTH_OF_FIELD_QULITY + 1) * 4;
	#endif
		
	
	for(int i=0; i < dofTaps; i++)
	{
		if(sampleCounterInCycle % sampleCycle == 0) 
		{
			sampleCounterInCycle=0;
			sampleCycleCounter++;
		
			#if ( POLYGONAL_BOKEH == 1)
				sampleCycle+=POLYGON_NUM;
				currentVertex.xy=float2(1.0 , 0.0);
				sincos(basedAngle* 0.017453292, nextVertex.y, nextVertex.x);	
			#else	
				sampleCycle+=8;
			#endif
		}
		sampleCounterInCycle++;
		
		#if (POLYGONAL_BOKEH==1)
			float sampleAngle=basedAngle / float(sampleCycleCounter) * sampleCounterInCycle;
			float remainAngle=frac(sampleAngle / basedAngle) * basedAngle;
		
			if(remainAngle == 0)
			{
				currentVertex=nextVertex;
				sincos((sampleAngle +  basedAngle) * 0.017453292, nextVertex.y, nextVertex.x);
			}

			float2 sampleOffset=lerp(currentVertex.xy, nextVertex.xy, remainAngle / basedAngle);
		#else
			float sampleAngle=0.78539816 / float(sampleCycleCounter) * sampleCounterInCycle;
			float2 sampleOffset;
			sincos(sampleAngle, sampleOffset.y, sampleOffset.x);
		#endif
		
		sampleOffset*=sampleCycleCounter / float(DEPTH_OF_FIELD_QULITY);
		float2  coordLow=coord.xy + (pixelSize.xy * sampleOffset.xy * discRadius);
		float4 tap=tex2D(SamplerColor, coordLow.xy);
		
		float weight=(tap.w >= centerDepth) ? 1.0 : abs(tap.w * 2.0 - 1.0);
		
		float luma=dot(tap.xyz, 0.3333);
		float brightMultipiler=max((luma - BokehBrightnessThreshold) * BokehBrightnessMultipiler, 0.0);
		tap.xyz*=1.0 + brightMultipiler*abs(tap.w*2.0 - 1.0);
		
		weight*=1.0 + BokehBias * pow(float(sampleCycleCounter)/float(DEPTH_OF_FIELD_QULITY), BokehBiasCurve);
		
		
	    res.xyz+=tap.xyz * weight;
	    res.w+=weight;
	}

	res.xyz /= res.w;
		
	res.w=centerDepth;


	return res;
}


float4 PS_GPDOFGaussianH(VS_OUTPUT_POST IN, float2 vPos : VPOS) : COLOR
{
	float2 coord=IN.txcoord.xy;
	
	float2 pixelSize=ScreenSize.y;
	pixelSize.y*=ScreenSize.z;
	
	float4 origcolor=tex2D(SamplerColor, coord.xy);

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return origcolor;
	#endif

	float depth=origcolor.w;
	float blurAmount=abs(depth*2.0 - 1.0);
	
	#if (DEPTH_OF_FIELD_QULITY > 0)
		blurAmount*=(depth < 0.5) ? (1.0 / max(NearBlurCurve, 1.0)) : 1.0;
		blurAmount=smoothstep(0.15, 1.0, blurAmount);
	#endif

	blurAmount *=  BokehPostBlur;
	
	float weight[5] = {0.2270270270, 0.1945945946, 0.1216216216, 0.0540540541, 
		0.0162162162};
	
	float4 res=origcolor * weight[0];
	
	for(int i=1; i < 5; i++)
	{
		res+=tex2D(SamplerColor, coord.xy + float2(i*pixelSize.x*blurAmount, 0)) * weight[i];
		res+=tex2D(SamplerColor, coord.xy - float2(i*pixelSize.x*blurAmount, 0)) * weight[i];
	}
	
	
	res.w=depth;
	
	return res;
}

float4 PS_GPDOFGaussianV(VS_OUTPUT_POST IN, float2 vPos : VPOS) : COLOR
{
	float2 coord=IN.txcoord.xy;

	float2 pixelSize=ScreenSize.y;
	pixelSize.y*=ScreenSize.z;
	
	
	float4 origcolor=tex2D(SamplerColor, coord.xy);

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return origcolor;
	#endif

	float depth=origcolor.w;
	float blurAmount=abs(depth*2.0 - 1.0);
	
	#if (DEPTH_OF_FIELD_QULITY > 0)
		blurAmount*=(depth < 0.5) ? (1.0 / max(NearBlurCurve, 1.0)) : 1.0;
		blurAmount=smoothstep(0.15, 1.0, blurAmount);
	#endif

	blurAmount *=  BokehPostBlur;
	
	float weight[5] = {0.2270270270, 0.1945945946, 0.1216216216, 0.0540540541, 
		0.0162162162};
	float4 res=origcolor * weight[0];

	for(int i=1; i < 5; i++)
	{
		res+=tex2D(SamplerColor, coord.xy + float2(0, i*pixelSize.y*blurAmount)) * weight[i];
		res+=tex2D(SamplerColor, coord.xy - float2(0, i*pixelSize.y*blurAmount)) * weight[i];
	}
	
	
	float origgray=dot(res.xyz, 0.3333);
	origgray/=origgray + 1.0;
	coord.xy=IN.txcoord.xy*16.0 + origgray;
	float4 cnoi=tex2D(SamplerNoise, coord);
	float noiseAmount=NoiseAmount*pow(blurAmount, NoiseCurve);
	res=lerp(res, (cnoi.x+0.5)*res, noiseAmount*saturate(1.0-origgray*1.8));
	
	res.w=depth;
	
	
	return res;
}

#endif


float4 PS_Colors(VS_OUTPUT_POST IN) : COLOR
{

	//global variables
	float2 pixelsize 	= float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

	float4 color 		= tex2D(SamplerColor, IN.txcoord.xy);

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return color;
	#endif

	
	#if (USE_CARTOON == 1)
	color.xyz = CartoonPass(color.xyz, IN.txcoord.xy, pixelsize.xy);
	#endif

	//colors

	#if (USE_LEVELS== 1)
	color.xyz = LevelsPass(color.xyz);
	#endif

	#if (USE_TECHNICOLOR == 1)
	color.xyz = TechnicolorPass(color.xyz);
	#endif

	#if (USE_DPX == 1)
	color.xyz = DPXPass(color.xyz);
	#endif

	#if (USE_MONOCHROME == 1)
	color.xyz = dot(color.xyz, 0.333);
	#endif

	#if (USE_LIFTGAMMAGAIN == 1)
	color.xyz = LiftGammaGainPass(color.xyz);
	#endif
	
	#if (USE_TONEMAP == 1)
	color.xyz = TonemapPass(color.xyz);
	#endif
	
	#if (USE_VIBRANCE == 1)
	color.xyz = VibrancePass(color.xyz);
	#endif
	
	#if (USE_CURVES == 1)
	color.xyz = CurvesPass(color.xyz);
	#endif

	#if (USE_SEPIA == 1)
	color.xyz = SepiaPass(color.xyz);
	#endif

	#if (USE_SKYRIMTONEMAP == 1)
	color.xyz = SkyrimTonemapPass(color.xyz);
	#endif

	#if (USE_COLORMOOD == 1)
	color.xyz = MoodPass(color.xyz);
	#endif
 
	#if (USE_CROSSPROCESS == 1)
	color.xyz = CrossPass(color.xyz);
	#endif
	
	#if (USE_FILMICPASS == 1)
	color.xyz = FilmPass(color.xyz);
	#endif

	#if (USE_REINHARDLINEAR == 1)
	color.xyz = ReinhardLinearToneMapping(color.xyz);
	#endif

	#if (USE_REINHARD == 1)
	color.xyz = ReinhardToneMapping(color.xyz);
	#endif

	#if (USE_HPD == 1)
	color.xyz = HaarmPeterDuikerFilmicToneMapping(color.xyz);
	#endif
	
	#if (USE_FILMICCURVE == 1)
	color.xyz = CustomToneMapping(color.xyz);
	#endif

	#if (USE_COLORMOD == 1)
	color.xyz = ColormodPass(color.xyz);
	#endif

	#if (USE_SPHERICALTONEMAP == 1)
	color.xyz = SphericalPass(color.xyz);
	#endif
		
	#if (USE_LEIFX == 1)
	color = LeiFX_Reduct(color, IN.txcoord.xy);
	#endif

	#if (USE_BLOOM == 1)
	color.a = 0;
	if(IN.txcoord.x > 0.125 && IN.txcoord.y > 0.125 && IN.txcoord.x < 0.1875 && IN.txcoord.y < 0.1875) color.a += tex2D(SamplerColor, IN.txcoord.xy*16-2).x;
	if(IN.txcoord.x > 0.25 && IN.txcoord.y > 0.25 && IN.txcoord.x < 0.3125 && IN.txcoord.y < 0.3125) color.a += tex2D(SamplerColor, IN.txcoord.xy*16-4).y;
	if(IN.txcoord.x > 0.375 && IN.txcoord.y > 0.375 && IN.txcoord.x < 0.4375 && IN.txcoord.y < 0.4375) color.a += tex2D(SamplerColor, IN.txcoord.xy*16-6).z;
	color.a = max(color.a - fBloomThreshold, 0);
	#endif

	return color;

}

float4 PS_Lighting(VS_OUTPUT_POST IN) : COLOR
{

	//global variables
	float2 pixelsize 	= float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

	float4 color 		= tex2D(SamplerColor, IN.txcoord.xy);
	float depth		= tex2D(SamplerDepth, IN.txcoord.xy).x;

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return color;
	#endif

	#if (USE_BLOOM == 1)
	float tempalpha = BloomPass1(IN.txcoord.xy, pixelsize/4);
	#endif

	#if (USE_CHROMATICABBERATION == 1)
	float4 coord=0.0;
	coord.xy=IN.txcoord.xy;
	coord.w=0.0;  
	float3 eta = float3(1.0+ChromaticAmount*0.9,1.0+ChromaticAmount*0.6,1.0+ChromaticAmount*0.3);
	float2 center;
	center.x = coord.x-0.5;
	center.y = coord.y-0.5;
	float LensZoom = 1.0/LensSize;

	float r2 = (IN.txcoord.x-0.5) * (IN.txcoord.x-0.5) + (IN.txcoord.y-0.5) * (IN.txcoord.y-0.5);     
	float f = 0;

	if( LensDistortionCubic == 0.0){
		f = 1 + r2 * LensDistortion;
	}else{
                f = 1 + r2 * (LensDistortion + LensDistortionCubic * sqrt(r2));
	};

	float x = f*LensZoom*(coord.x-0.5)+0.5;
	float y = f*LensZoom*(coord.y-0.5)+0.5;
	float2 rCoords = (f*eta.r)*LensZoom*(center.xy*0.5)+0.5;
	float2 gCoords = (f*eta.g)*LensZoom*(center.xy*0.5)+0.5;
	float2 bCoords = (f*eta.b)*LensZoom*(center.xy*0.5)+0.5;
	
	float4 inputDistord = float4(tex2D(SamplerColor,rCoords).r , tex2D(SamplerColor,gCoords).g ,tex2D(SamplerColor,bCoords).b, tex2D(SamplerColor,float2(x,y)).a);

	float4 schmotzcolor = float4(inputDistord.r,inputDistord.g,inputDistord.b,1);

	color.xyz = schmotzcolor.xyz;
 	#endif

	#if( USE_GODRAYS == 1)
	float2 ScreenLightPos = float2(0.5, 0.5);
	float2 texCoord = IN.txcoord.xy;
	float2 deltaTexCoord = (texCoord.xy - ScreenLightPos.xy);
	deltaTexCoord *= 1.0 / (float)GodraySamples * GodrayDensity;


	float illuminationDecay = 1.0;

	for(int x = 0; x < GodraySamples; x++) {
	
		texCoord -= deltaTexCoord;;
		float4 sample2 = tex2D(SamplerColor, texCoord.xy);
		float sampledepth = tex2D(SamplerDepth, texCoord.xy).x;
		sample2.w = saturate(dot(sample2.xyz, 0.3333) - GodrayThreshold);
		sample2.r *= 1.0;
		sample2.g *= 0.95;
		sample2.b *= 0.85;
		sample2 *= illuminationDecay * GodrayWeight;
		#if (GODRAYDEPTHCHECK == 1)
		if(sampledepth>0.97) color.xyz += sample2.xyz*sample2.w;
		#else
		color += sample2;
		#endif
		illuminationDecay *= GodrayDecay;
	}
	#endif

	#if (USE_LENZFLARE == 1)

	float3 lfoffset[19]={
		float3(0.9, 0.01, 4),
		float3(0.7, 0.25, 25),
		float3(0.3, 0.25, 15),
		float3(1, 1.0, 5),
		float3(-0.15, 20, 1),
		float3(-0.3, 20, 1),
		float3(6, 6, 6),
		float3(7, 7, 7),
		float3(8, 8, 8),
		float3(9, 9, 9),
		float3(0.24, 1, 10),
		float3(0.32, 1, 10),
		float3(0.4, 1, 10),
		float3(0.5, -0.5, 2),
		float3(2, 2, -5),
		float3(-5, 0.2, 0.2),
		float3(20, 0.5, 0),
		float3(0.4, 1, 10),
		float3(0.00001, 10, 20)
	};

	float3 lffactors[19]={
		float3(1.5, 1.5, 0),
		float3(0, 1.5, 0),
		float3(0, 0, 1.5),
		float3(0.2, 0.25, 0),
		float3(0.15, 0, 0),
		float3(0, 0, 0.15),
		float3(1.4, 0, 0),
		float3(1, 1, 0),
		float3(0, 1, 0),
		float3(0, 0, 1.4),
		float3(1, 0.3, 0),
		float3(1, 1, 0),
		float3(0, 2, 4),
		float3(0.2, 0.1, 0),
		float3(0, 0, 1),
		float3(1, 1, 0),
		float3(1, 1, 0),
		float3(0, 0, 0.2),
 	       	float3(0.012,0.313,0.588)
	};

	float3 lenstemp = 0;

	float2 lfcoord = float2(0,0);
	float2 distfact=(IN.txcoord.xy-0.5);
	distfact.x *= ScreenSize.z;

	for (int i=0; i<19; i++)
	{
		lfcoord.xy=lfoffset[i].x*distfact;
		lfcoord.xy*=pow(2.0*length(float2(distfact.x,distfact.y)), lfoffset[i].y*3.5);
		lfcoord.xy*=lfoffset[i].z;
		lfcoord.xy=0.5-lfcoord.xy;
		float2 tempfact = (lfcoord.xy-0.5)*2;
		float templensmult = clamp(1.0-dot(tempfact,tempfact),0,1);
		float3 lenstemp1 = dot(tex2D(SamplerColor, lfcoord.xy).xyz,0.333);

		#if (LENZDEPTHCHECK == 1)
		float templensdepth = tex2D(SamplerDepth, lfcoord.xy).x;
		if(templensdepth < 0.97) lenstemp1 = 0;
		#endif	
	
		lenstemp1 = max(0,lenstemp1.xyz - LenzThreshold);
		lenstemp1 *= lffactors[i].xyz*templensmult;

		lenstemp += lenstemp1;
	}

	color.xyz += lenstemp.xyz*LenzIntensity;

	#endif
	
	#if (USE_BLOOM == 1)
	color.a = tempalpha;
	#endif

	return color;

}

// Anamorphic lens flare pixel shader (Matso code)
float4 PS_Anamorphic(VS_OUTPUT_POST IN, float2 vPos : VPOS, uniform int axis) : COLOR
{
	float4 res = 0;

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return res;
	#endif

	float2 coord = IN.txcoord.xy;
	float2 pixelsize = float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

	float3 anamFlare = AnamorphicSample(axis, coord.xy, fFlareBlur) * fFlareTint;
	float gaussweight[5] = {0.2270270270, 0.1945945946, 0.1216216216, 0.0540540541, 0.0162162162};

	for(int i=1; i < 5; i++)
	{
		anamFlare+=AnamorphicSample(axis, coord.xy + float2(0, i * pixelsize.y), fFlareBlur) * fFlareTint* gaussweight[i];
		anamFlare+=AnamorphicSample(axis, coord.xy - float2(0, i * pixelsize.y), fFlareBlur) * fFlareTint* gaussweight[i];
	}

	res.rgb = anamFlare * fFlareIntensity;

	#if (ANAMFLAREDEPTHCHECK == 1)
	float depth = tex2D(SamplerDepth, coord.xy);
	if(depth< 0.97) res.xyz = 0;
	#endif

	return res;
}

float4 PS_Image(VS_OUTPUT_POST IN) : COLOR
{

	//global variables
	float2 pixelsize 	= float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

	float4 color 		= tex2D(SamplerColor, IN.txcoord.xy);

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return color;
	#endif

	#if (USE_BLOOM == 1)
	float tempalpha =  BloomPass2(IN.txcoord.xy, pixelsize/4);
	#endif

	#if (USE_SHARPENING == 1)
	color.xyz = SharpPass(color.xyz, IN.txcoord.xy, pixelsize.xy);
	#endif

	
	#if(USE_GRAIN == 1)
	float GrainTimerSeed = Timer.x * fGrainMotion;
	float2 GrainTexCoordSeed = IN.txcoord.xy * 1.0;
	float2 GrainSeed1 = GrainTexCoordSeed + float2( 0.0, GrainTimerSeed );
	float2 GrainSeed2 = GrainTexCoordSeed + float2( GrainTimerSeed, 0.0 );
	float2 GrainSeed3 = GrainTexCoordSeed + float2( GrainTimerSeed, GrainTimerSeed );
	float GrainNoise1 = random( GrainSeed1 );
	float GrainNoise2 = random( GrainSeed2 );
	float GrainNoise3 = random( GrainSeed3 );
	float GrainNoise4 = ( GrainNoise1 + GrainNoise2 + GrainNoise3 ) * 0.333333333;
	float3 GrainNoise = float3( GrainNoise4, GrainNoise4, GrainNoise4 );
	float3 GrainColor = float3( GrainNoise1, GrainNoise2, GrainNoise3 );
	color.rgb += ( lerp( GrainNoise, GrainColor, fGrainSaturation ) * fGrainIntensity ) - ( fGrainIntensity * 0.5);
	#endif

	#if (USE_BLOOM == 1)
	color.a = tempalpha;
	#endif

	return color;

}


float4 PS_Overlay(VS_OUTPUT_POST IN) : COLOR
{

	//global variables
	float2 pixelsize 	= float2(ScreenSize.y,ScreenSize.y*ScreenSize.z);

	float4 color 		= tex2D(SamplerColor, IN.txcoord.xy);

	#if (USE_SPLITSCREEN == 1)
	if(IN.txcoord.x > 0.5) return color;
	#endif

	#if (USE_BLOOM == 1)
	float3 bloom = 0;
	bloom.x	= tex2D(SamplerColor, IN.txcoord.xy/16+0.125).a;
	bloom.y	= tex2D(SamplerColor, IN.txcoord.xy/16+0.250).a;
	bloom.z	= tex2D(SamplerColor, IN.txcoord.xy/16+0.375).a;
	float bloomgray = dot(bloom.xyz, 0.333);
	bloom.xyz = saturate(lerp(bloomgray.xxx, bloom.xyz, fBloomSaturation));
	bloom.xyz *= fBloomTint.xyz;
	#if (MIXMODE == 1)
	color.xyz += bloom.xyz * fBloomPower;
	#endif
	#if (MIXMODE == 2)
	float3 toBlend = saturate(bloom.xyz * fBloomPower);
	color.xyz = 1-(1-color.xyz)*(1-toBlend.xyz);
	#endif
	#endif

	#if (USE_LEIFX == 1)
	color = LeiFX_Gamma(color,IN.txcoord.xy); 
	#endif

	#if (USE_EXPLOSION == 1)
	color.xyz = ExplosionPass(color.xyz, IN.txcoord.xy, pixelsize.xy);
	#endif

	#if (USE_SINCITY == 1)
	float sinlumi = dot(color.rgb, float3(0.30f,0.59f,0.11f));
	if(color.r > (color.g + 0.2f) && color.r > (color.b + 0.025f))
	{
		color.rgb = float3(sinlumi, 0, 0)*1.5;
	}
	else
	{
		color.rgb = sinlumi;
	}
	#endif

	#if (USE_COLORHUEFX == 1)
	color.xyz = colorhuefx_prod80(color.xyz);
	#endif

	#if (USE_BORISVIGNETTE==1)
        float2	uv=(IN.txcoord-0.5)*EVignetteRadius;
	float	vignetteold=saturate(dot(uv.xy, uv.xy));
	vignetteold=pow(vignetteold, EVignetteCurve);
	#if (VIGNCOLORING==1)
	float3	EVignetteColor=float3(VIGNREDAMOUNT, VIGNGREENAMOUNT, VIGNBLUEAMOUNT);
	#else
	float3	EVignetteColor=float3(0.0, 0.0, 0.0);
	#endif
	color.xyz=lerp(color.xyz, EVignetteColor, vignetteold*EVignetteAmount);
	#endif	

	#if (USE_HD6_VIGNETTE==1)
	float rovigpwr = CircularPower; //for a circular vignette
	float2 sqvigpwr = float2( SquareTop, SquareBottom ); // for the top and bottom of the screen
	float vsatstrength = ColorDistortion; // color distortion
	float vignettepow = ContrastSharpen; // increases the contrast and sharpness
	float vstrengthatnight = VignetteBorder;
 
 	float2 inTex = IN.txcoord;
 	float vhnd = 0.5;
 	float4 voriginal = color;
 	float4 vcolor = voriginal;
 	vcolor.xyz=1;
 	inTex -= 0.5; // center
 	inTex.y += 0.01; // offset from the center
 	float vignette = 1.0 - dot( inTex, inTex );
 	vcolor *= pow( vignette, vignettepow );
 
 	float4 rvigtex = vcolor;
 	rvigtex.xyz = pow( vcolor, 1 );
 	rvigtex.xyz = lerp(float3(0.5, 0.5, 0.5), rvigtex.xyz, 2.25); // contrast
 	rvigtex.xyz = lerp(float3(1,1,1),rvigtex.xyz,rovigpwr); // strength of the circular vinetty
 
	//darken the top and bottom
 	float4 vigtex = vcolor;
 	vcolor.xyz = float3(1,1,1);

	#if (LEFTANDRIGHT==1)
 	float3 topv = min((inTex.x+0.5)*2,1.5) * 2; // top
 	float3 botv = min(((0-inTex.x)+0.5)*2,1.5) * 2; // botton
	topv= lerp(float3(1,1,1), topv, sqvigpwr.x);
 	botv= lerp(float3(1,1,1), botv, sqvigpwr.y);
	vigtex.xyz = (topv)*(botv);
	#endif
	#if (TOPANDBOTTOM==1)
        float3 topv = min((inTex.y+0.5)*2,1.5) * 2; // top
 	float3 botv = min(((0-inTex.y)+0.5)*2,1.5) * 2; // botton
	topv= lerp(float3(1,1,1), topv, sqvigpwr.x);
 	botv= lerp(float3(1,1,1), botv, sqvigpwr.y);
	vigtex.xyz = (topv)*(botv);
	#endif
	#if (CORNERDARKEN==1)
	float3 rightv = min((inTex.x+0.5)*2,1.5) * 2;
 	float3 leftv = min(((0-inTex.x)+0.5)*2,1.5) * 2; 
        float3 topv = min((inTex.y+0.5)*2,1.5) * 2; 
 	float3 botv = min(((0-inTex.y)+0.5)*2,1.5) * 2; 
 	rightv= lerp(float3(1,1,1), rightv, sqvigpwr.y);
 	leftv= lerp(float3(1,1,1), leftv, sqvigpwr.x);
        topv= lerp(float3(1,1,1), topv, sqvigpwr.x);
 	botv= lerp(float3(1,1,1), botv, sqvigpwr.y);
 	vigtex.xyz = (topv)*(botv)*(rightv)*(leftv);
	#endif
 	
	// mix the two types of vignettes
 	vigtex.xyz*=rvigtex.xyz;
	vigtex.xyz = lerp(vigtex.xyz,float3(1,1,1),(vhnd-vstrengthatnight*vhnd)); //for a dark screen
 	vigtex.xyz = min(vigtex.xyz,1);
 	vigtex.xyz = max(vigtex.xyz,0);
 	float3 vtintensity = dot(voriginal.xyz, float3(0.2125, 0.7154, 0.0721));
 	color.xyz = lerp(vtintensity, voriginal.xyz, ((((1-(vigtex.xyz*2))+2)-1)*vsatstrength)+1 );
  	color.xyz *= (vigtex.xyz);
	#endif

	#if (USE_BORDER==1)
	float2 distancefromcenter = abs(IN.txcoord.xy - 0.5);
	bool2 screen_border = step(0.5 - pixelsize,distancefromcenter);
	color.xyz = (!dot(screen_border, 1.0)) ? color.xyz : 0.0;
	#endif

	#if (USE_MOVIEBARS == 1)
	color.xyz = IN.txcoord.y > 0.12 && IN.txcoord.y < 0.88 ? color.xyz : 0.0;
	#endif
	
	return color;

}

//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Techniques
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#if (USE_GP65CJ042DOF == 0 && USE_MATSODOF == 0 && USE_PETKAGTADOF == 0)
technique PostProcess
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Colors();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess2
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Lighting();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
   #if (USE_ANAMFLARE == 1)
   pass p1
   {
	AlphaBlendEnable = true;
	SrcBlend = One;
	DestBlend = One;
	PixelShader = compile ps_3_0 PS_Anamorphic(fFlareAxis);
   }
   #endif
}

technique PostProcess3
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Image();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess4
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Overlay();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}
#endif

//could've done this more effectively and shorter but screw it.

#if (USE_PETKAGTADOF == 1)
technique PostProcess
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_ProcessDoFBokeh();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess2
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Colors();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess3
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Lighting();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
   #if (USE_ANAMFLARE == 1)
   pass p1
   {
	AlphaBlendEnable = true;
	SrcBlend = One;
	DestBlend = One;
	PixelShader = compile ps_3_0 PS_Anamorphic(fFlareAxis);
   }
   #endif
}

technique PostProcess4
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Image();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess5
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Overlay();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}
#endif

#if (USE_MATSODOF == 1)

technique PostProcess
{
	pass P0
	{
		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_ProcessPass_FastDoF(FIRST_PASS);

		DitherEnable = FALSE;
		ZEnable = FALSE;
		CullMode = NONE;
		ALPHATESTENABLE = FALSE;
		SEPARATEALPHABLENDENABLE = FALSE;
		AlphaBlendEnable = FALSE;
		StencilEnable = FALSE;
		FogEnable = FALSE;
		SRGBWRITEENABLE = FALSE;
	}
}

technique PostProcess2
{
	pass P0
	{
		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_ProcessPass_FastDoF(SECOND_PASS);

		DitherEnable = FALSE;
		ZEnable = FALSE;
		CullMode = NONE;
		ALPHATESTENABLE = FALSE;
		SEPARATEALPHABLENDENABLE = FALSE;
		AlphaBlendEnable = FALSE;
		StencilEnable = FALSE;
		FogEnable = FALSE;
		SRGBWRITEENABLE = FALSE;
	}
}

technique PostProcess3
{
	pass P0
	{
		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_ProcessPass_FastDoF(THIRD_PASS);

		DitherEnable = FALSE;
		ZEnable = FALSE;
		CullMode = NONE;
		ALPHATESTENABLE = FALSE;
		SEPARATEALPHABLENDENABLE = FALSE;
		AlphaBlendEnable = FALSE;
		StencilEnable = FALSE;
		FogEnable = FALSE;
		SRGBWRITEENABLE = FALSE;
	}
}

technique PostProcess4
{
	pass P0
	{
		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_ProcessPass_FastDoF(FOURTH_PASS);

		DitherEnable = FALSE;
		ZEnable = FALSE;
		CullMode = NONE;
		ALPHATESTENABLE = FALSE;
		SEPARATEALPHABLENDENABLE = FALSE;
		AlphaBlendEnable = FALSE;
		StencilEnable = FALSE;
		FogEnable = FALSE;
		SRGBWRITEENABLE = FALSE;
	}
}

technique PostProcess5
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Colors();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess6
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Lighting();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
   #if (USE_ANAMFLARE == 1)
   pass p1
   {
	AlphaBlendEnable = true;
	SrcBlend = One;
	DestBlend = One;
	PixelShader = compile ps_3_0 PS_Anamorphic(fFlareAxis);
   }
   #endif
}

technique PostProcess7
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Image();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess8
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Overlay();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}
			
#endif

#if (USE_GP65CJ042DOF == 1)
technique PostProcess
{
	pass P0
	{

		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_GPDOFFocus();

		DitherEnable=FALSE;
		ZEnable=FALSE;
		CullMode=NONE;
		ALPHATESTENABLE=FALSE;
		SEPARATEALPHABLENDENABLE=FALSE;
		AlphaBlendEnable=FALSE;
		StencilEnable=FALSE;
		FogEnable=FALSE;
		SRGBWRITEENABLE=FALSE;
	}
}


technique PostProcess2
{
	pass P0
	{

		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_GPDOFBokehblur();

		DitherEnable=FALSE;
		ZEnable=FALSE;
		CullMode=NONE;
		ALPHATESTENABLE=FALSE;
		SEPARATEALPHABLENDENABLE=FALSE;
		AlphaBlendEnable=FALSE;
		StencilEnable=FALSE;
		FogEnable=FALSE;
		SRGBWRITEENABLE=FALSE;
	}
}


technique PostProcess3
{
	pass P0
	{

		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_GPDOFGaussianH();

		DitherEnable=FALSE;
		ZEnable=FALSE;
		CullMode=NONE;
		ALPHATESTENABLE=FALSE;
		SEPARATEALPHABLENDENABLE=FALSE;
		AlphaBlendEnable=FALSE;
		StencilEnable=FALSE;
		FogEnable=FALSE;
		SRGBWRITEENABLE=FALSE;
	}
}

technique PostProcess4
{
	pass P0
	{

		VertexShader = compile vs_3_0 VS_PostProcess();
		PixelShader  = compile ps_3_0 PS_GPDOFGaussianV();

		DitherEnable=FALSE;
		ZEnable=FALSE;
		CullMode=NONE;
		ALPHATESTENABLE=FALSE;
		SEPARATEALPHABLENDENABLE=FALSE;
		AlphaBlendEnable=FALSE;
		StencilEnable=FALSE;
		FogEnable=FALSE;
		SRGBWRITEENABLE=FALSE;
	}
}

technique PostProcess5
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Colors();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess6
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Lighting();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
   #if (USE_ANAMFLARE == 1)
   pass p1
   {
	AlphaBlendEnable = true;
	SrcBlend = One;
	DestBlend = One;
	PixelShader = compile ps_3_0 PS_Anamorphic(fFlareAxis);
   }
   #endif
}

technique PostProcess7
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Image();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}

technique PostProcess8
{
   pass P0
   {
	VertexShader = compile vs_3_0 VS_PostProcess();
	PixelShader  = compile ps_3_0 PS_Overlay();

	FogEnable=FALSE;
	ALPHATESTENABLE=FALSE;
	SEPARATEALPHABLENDENABLE=FALSE;
	AlphaBlendEnable=FALSE;
	FogEnable=FALSE;
	SRGBWRITEENABLE=FALSE;
   }
}
#endif


/* //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 


#include "Common.cfi"
#include "ShadeLib.cfi"
#include "PostEffectsLib.cfi"

// Shader global descriptions
float Script : STANDARDSGLOBAL
<
  string Script =
           "NoPreview;"
           "LocalConstants;"
           "ShaderDrawType = Custom;"
           "ShaderType = PostProcess;"
>; 

sampler2D rainbowSampler = sampler_state
{
  Texture = textures/defaults/glitter_color.dds;
  MinFilter = LINEAR;  
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
};

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Texture To Texture technique /////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 texToTexParams0;
float4 texToTexParams1;

////////////////// samplers /////////////////////

///////////////// vertex shader //////////////////

struct vtxOutTexToTex
{
  float4 HPosition  : POSITION;
  float4 baseTC0 : TEXCOORDN;    
  float4 baseTC1 : TEXCOORDN;    
  float4 baseTC2 : TEXCOORDN;    
  float4 baseTC3 : TEXCOORDN;    
  float4 baseTC4 : TEXCOORDN;    
};

vtxOutTexToTex TexToTexVS(vtxIn IN)
{
  vtxOutTexToTex OUT = (vtxOutTexToTex)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  

  OUT.baseTC0.xy = IN.baseTC.xy;
  OUT.baseTC1.xy = IN.baseTC.xy+texToTexParams0.xy;
  OUT.baseTC2.xy = IN.baseTC.xy+texToTexParams0.zw;
  OUT.baseTC3.xy = IN.baseTC.xy+texToTexParams1.xy;
  OUT.baseTC4.xy = IN.baseTC.xy+texToTexParams1.zw; 
  
  return OUT;
}

///////////////// pixel shader //////////////////
pixout TexToTexPS(vtxOutTexToTex IN)
{
  pixout OUT;
  OUT.Color = tex2D(_tex0, IN.baseTC0.xy);    
  return OUT;
}

// With rotated grid sampling (less artifacts). Used for image rescaling
pixout TexToTexSampledPS(vtxOutTexToTex IN)
{
  pixout OUT;

  half4 baseColor0 = tex2D(_tex0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(_tex0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(_tex0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(_tex0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(_tex0, IN.baseTC4.xy);

  OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)*0.2f;
   
  return OUT;
}


// Version for SSAO z-target
pixout TexToTexSampledAOPS(vtxOutTexToTex IN)
{
  pixout OUT;
      
  half4 baseColor0 = tex2D(_tex0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(_tex0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(_tex0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(_tex0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(_tex0, IN.baseTC4.xy);
  
  // Use max to prevent artifacts.
  OUT.Color = max(baseColor0, max(max(baseColor1.r, baseColor3.r), max(baseColor2.r, baseColor4.r)));
  //OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)*0.2f;
   
  return OUT;
}

////////////////// technique /////////////////////

technique TextureToTextureResampledAO
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexSampledAOPS();
    CullMode = None;        
  }
}

technique TextureToTexture
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexPS();
    CullMode = None;        
  }
}

technique TextureToTextureResampled
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();            
    PixelShader = CompilePS TexToTexSampledPS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Post-Process Anti Aliasing technique ///////////////////////////////////////////////////////////
///	FXAA technique and original code is copyright (C) NVIDIA by Timothy Lottes

/// Specific data ////////////////////////
static const float4 fxaaParams0 = {0.08f, 0.16f, 0.75f, 0.25f};
static const float4 fxaaParams1 = {4.f, 0.05f, 0.125f, 0.0f}; 

/// Constants ////////////////////////////

/// Samplers ////////////////////////////

///////////////// vertex shader //////////////////

struct vtxOutFXAA
{
  float4 HPosition  : POSITION;
  float4 baseTC     : TEXCOORD0;
  float4 baseTC1	: TEXCOORD1;
};

vtxOutFXAA FXAA_VS(vtxIn IN)
{
  vtxOutFXAA OUT = (vtxOutFXAA)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  // Output with subpixel offset into wz
  OUT.baseTC1.xy = IN.baseTC.xy - 0.5 * g_VS_ScreenSize.zw;
  OUT.baseTC1.zw = IN.baseTC.xy + 0.5 * g_VS_ScreenSize.zw;

  return OUT;
}

///////////////// pixel shader //////////////////

pixout FXAA_PS(vtxOutFXAA IN)
{
  pixout OUT = (pixout)0;

	// Pixel sizes.
	float2 vPixelSizes = PS_ScreenSize.zw * 2.0;
	
	// Initial sample. Used on early-out.
	float4 cSampleCenter = tex2Dlod(_tex0, float4(IN.baseTC.xy,0,0));
	OUT.Color = cSampleCenter;

	float fLumCenter = cSampleCenter.w;
	float fLumBottom = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 0, 1) * vPixelSizes.xy,0,0)).w;
	float fLumRight  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1, 0) * vPixelSizes.xy,0,0)).w;
	float fLumTop    = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 0,-1) * vPixelSizes.xy,0,0)).w;
	float fLumLeft   = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1, 0) * vPixelSizes.xy,0,0)).w;

    float fMaxRange = max(max(fLumTop, fLumLeft), max(fLumRight, max(fLumBottom, fLumCenter)));
    float fMinRange = min(min(fLumTop, fLumLeft), min(fLumRight, min(fLumBottom, fLumCenter)));
    float fRange = fMaxRange - fMinRange;
    
    // Early out.
    if(fRange < max(fxaaParams0.x, fMaxRange * fxaaParams0.y))
		return OUT;
		
	float fLumTopLeft     = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1,-1) * vPixelSizes.xy,0,0)).w;
	float fLumBottomRight = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1, 1) * vPixelSizes.xy,0,0)).w;
	float fLumTopRight	  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2( 1,-1) * vPixelSizes.xy,0,0)).w;
	float fLumBottomLeft  = tex2Dlod(_tex0, float4(IN.baseTC.xy + float2(-1, 1) * vPixelSizes.xy,0,0)).w;
    
    float fLumTopBottom = fLumTop  + fLumBottom;
    float fLumLeftRight = fLumLeft + fLumRight;
    float fLumSubPixel = fLumTopBottom + fLumLeftRight;
    
    float fEdgeH1 = (-2.0 * fLumCenter) + fLumTopBottom;
    float fEdgeV1 = (-2.0 * fLumCenter) + fLumLeftRight;

    float fLumTopBottomRight = fLumTopRight + fLumBottomRight;
    float fLumTopLeftRight = fLumTopLeft + fLumTopRight;
    float fEdgeH2 = (-2.0 * fLumRight) + fLumTopBottomRight;
    float fEdgeV2 = (-2.0 * fLumTop) + fLumTopLeftRight;

    float fLumTopBottomLeft = fLumTopLeft + fLumBottomLeft;
    float fLumBottomLeftRight = fLumBottomLeft + fLumBottomRight;
    float fEdgeH4 = (abs(fEdgeH1) * 2.0) + abs(fEdgeH2);
    float fEdgeV4 = (abs(fEdgeV1) * 2.0) + abs(fEdgeV2);
    float fEdgeH3 = (-2.0 * fLumLeft) + fLumTopBottomLeft;
    float fEdgeV3 = (-2.0 * fLumBottom) + fLumBottomLeftRight;
    float fEdgeH = abs(fEdgeH3) + fEdgeH4;
    float fEdgeV = abs(fEdgeV3) + fEdgeV4;

    float fBlendSubPixel = fLumTopBottomLeft + fLumTopBottomRight; 
    float fLengthSign = vPixelSizes.x;
    bool bHorizontalSpan = fEdgeH >= fEdgeV;
    float fSubPixelA = fLumSubPixel * 2.0 + fBlendSubPixel; 

    if(!bHorizontalSpan) fLumTop = fLumLeft; 
    if(!bHorizontalSpan) fLumBottom = fLumRight;
    if(bHorizontalSpan) fLengthSign = vPixelSizes.y;
    float fSubPixelB = (fSubPixelA * (1.0/12.0)) - fLumCenter;	
        
    float fGradientN = fLumTop - fLumCenter;
    float fGradientS = fLumBottom - fLumCenter;
    float fLumTopCenter = fLumTop + fLumCenter;
    float fLumBottomCenter = fLumBottom + fLumCenter;
    bool fPairN = abs(fGradientN) >= abs(fGradientS);
    float fGradient = max(abs(fGradientN), abs(fGradientS));
    if(fPairN) fLengthSign = -fLengthSign;
    float fSubPixelC = saturate(abs(fSubPixelB) * (1.0 / fRange));
    
    float2 vPositionB;
    vPositionB.x = IN.baseTC.x;
    vPositionB.y = IN.baseTC.y;
    float2 vOffsetNP;
    vOffsetNP.x = (!bHorizontalSpan) ? 0.0 : vPixelSizes.x;
    vOffsetNP.y = ( bHorizontalSpan) ? 0.0 : vPixelSizes.y;
    if(!bHorizontalSpan) vPositionB.x += fLengthSign * 0.5;
    if( bHorizontalSpan) vPositionB.y += fLengthSign * 0.5;
    
    float2 vPositionN;
    vPositionN.x = vPositionB.x - vOffsetNP.x;
    vPositionN.y = vPositionB.y - vOffsetNP.y;
    
    float2 vPositionP;
    vPositionP.x = vPositionB.x + vOffsetNP.x;
    vPositionP.y = vPositionB.y + vOffsetNP.y;
    
    float fSubPixelD = ((-2.0)*fSubPixelC) + 3.0;
    float fLumEndN = tex2Dlod(_tex0, float4(vPositionN,0,0)).w;
    float fLumEndP = tex2Dlod(_tex0, float4(vPositionP,0,0)).w;
    
    float fSubPixelE = (fSubPixelC * fSubPixelC);
    
    if(!fPairN) fLumTopCenter = fLumBottomCenter;
    float fGradientScaled = fGradient * 1.0/4.0;
    float fSubPixelF = fSubPixelD * fSubPixelE;
    bool bLumZero = (fLumCenter - fLumTopCenter * 0.5) < 0.0;
    
    fLumEndN -= fLumTopCenter * 0.5;
    fLumEndP -= fLumTopCenter * 0.5;
    bool bDoneN = abs(fLumEndN) >= fGradientScaled;
    bool bDoneP = abs(fLumEndP) >= fGradientScaled;
    if(!bDoneN) vPositionN.x -= vOffsetNP.x;
    if(!bDoneN) vPositionN.y -= vOffsetNP.y;
    bool bDoneNP = (!bDoneN) || (!bDoneP);
    if(!bDoneP) vPositionP.x += vOffsetNP.x;
    if(!bDoneP) vPositionP.y += vOffsetNP.y;
    
    static const half fSearchScale[11] = {1.0, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0};

    #if D3D10
    [unroll]
    #endif
    // Search edges.
    for(int i = 0; i < 11; i++)
    {
        if(!bDoneN) fLumEndN = tex2Dlod(_tex0, float4(vPositionN.xy,0,0)).w;
        if(!bDoneP) fLumEndP = tex2Dlod(_tex0, float4(vPositionP.xy,0,0)).w;
        if(!bDoneN) fLumEndN = fLumEndN - fLumTopCenter * 0.5;
        if(!bDoneP) fLumEndP = fLumEndP - fLumTopCenter * 0.5;
        bDoneN = abs(fLumEndN) >= fGradientScaled;
        bDoneP = abs(fLumEndP) >= fGradientScaled;
        if(!bDoneN) vPositionN.x -= vOffsetNP.x * fSearchScale[i];
        if(!bDoneN) vPositionN.y -= vOffsetNP.y * fSearchScale[i];
        bDoneNP = (!bDoneN) || (!bDoneP);
        if(!bDoneP) vPositionP.x += vOffsetNP.x * fSearchScale[i];
        if(!bDoneP) vPositionP.y += vOffsetNP.y * fSearchScale[i];
    }
                    
    float fDestN = IN.baseTC.x - vPositionN.x;
    float fDestP = vPositionP.x - IN.baseTC.x;
    
    if(!bHorizontalSpan) fDestN = IN.baseTC.y - vPositionN.y;
    if(!bHorizontalSpan) fDestP = vPositionP.y - IN.baseTC.y;
    
    float fSpanLength = (fDestP + fDestN);
    bool bGoodSpanN = (fLumEndN < 0.0) != bLumZero;
    bool bGoodSpanP = (fLumEndP < 0.0) != bLumZero;

    bool bDirectionN = fDestN < fDestP;
    float fDest = min(fDestN, fDestP);
    bool bGoodSpan = bDirectionN ? bGoodSpanN : bGoodSpanP;
    float fSubPixelG = fSubPixelF * fSubPixelF;
    float fPixelOffset = (fDest * (-(1.0/fSpanLength))) + 0.5;
    float fSubPixelH = fSubPixelG * fxaaParams0.z;

    float fPixelOffsetGood = bGoodSpan ? fPixelOffset : 0.0;
    float fPixelOffsetSubpix = max(fPixelOffsetGood, fSubPixelH);
    if(!bHorizontalSpan) IN.baseTC.x += fPixelOffsetSubpix * fLengthSign;
    if( bHorizontalSpan) IN.baseTC.y += fPixelOffsetSubpix * fLengthSign;
    
    OUT.Color = tex2Dlod(_tex0, float4(IN.baseTC.xy,0,0));
        
    return OUT;
}

///////////////// pixel shader //////////////////
pixout FXAAFast_PS(vtxOutFXAA IN)
{
  pixout OUT = (pixout)0;

  const half4 vPixelSizes = PS_ScreenSize.zwzw * half4(4.0, 4.0, 1.0, 1.0);

  // Initial sample. Used on early-out.
  float4 cSampleCenter = tex2Dlod(_tex0, half4(IN.baseTC.xy,0,0));
  OUT.Color = cSampleCenter;

  half4 vDir;
  vDir.y = 0.0;
  half4 fLumTopRight = tex2Dlod(_tex0, half4(IN.baseTC1.zy,0,0));
  fLumTopRight.w += half(1.0 / 384.0);
  vDir.x = -fLumTopRight.w;
  vDir.z = -fLumTopRight.w;
	
  half4 fLumBottomLeft = tex2Dlod(_tex0, half4(IN.baseTC1.xw,0,0));
  vDir.x += fLumBottomLeft.w;
  vDir.z += fLumBottomLeft.w;
  
  half4 fLumTopLeft = tex2Dlod(_tex0, half4(IN.baseTC1.xy,0,0));
  vDir.x -= fLumTopLeft.w;
  vDir.z += fLumTopLeft.w;	
   
  half4 fLumBottomRight = tex2Dlod(_tex0, half4(IN.baseTC1.zw,0,0));
  vDir.x += fLumBottomRight.w;
  vDir.z -= fLumBottomRight.w;
    
  half fLumMin = min(min(fLumTopLeft.w, fLumBottomLeft.w), min(fLumTopRight.w, fLumBottomRight.w));
  half fLumMax = max(max(fLumTopLeft.w, fLumBottomLeft.w), max(fLumTopRight.w, fLumBottomRight.w));
   
  if((max(fLumMax, cSampleCenter.w) - min(fLumMin, cSampleCenter.w)) < max(fxaaParams1.y, fLumMax * fxaaParams1.z))
	return OUT;
	
  half4 vDir1;
  vDir1.xy = normalize(vDir.xyz).xz;
  half fDirAbsMinTimesC = min(abs(vDir1.x), abs(vDir1.y)) * fxaaParams1.x;

  half4 vDir2;
  vDir2.xy = clamp(vDir1.xy / fDirAbsMinTimesC, -2.0h, 2.0h);
  vDir1.zw = IN.baseTC.xy;
  vDir2.zw = IN.baseTC.xy;
  half4 temp1N;
  temp1N.xy = vDir1.zw - vDir1.xy * vPixelSizes.zw;
  
  temp1N = tex2Dlod(_tex0, float4(temp1N.xy,0,0));
  half4 rgby1;
  rgby1.xy = vDir1.zw + vDir1.xy * vPixelSizes.zw;
  
  rgby1 = tex2Dlod(_tex0, float4(rgby1.xy,0,0));
  rgby1 = (temp1N + rgby1) * 0.5;
  
  half4 temp2N;
  temp2N.xy = vDir2.zw - vDir2.xy * vPixelSizes.xy;
  temp2N = tex2Dlod(_tex0, float4(temp2N.xy,0,0));
  
  half4 rgby2;
  rgby2.xy = vDir2.zw + vDir2.xy * vPixelSizes.xy;
  rgby2 = tex2D(_tex0, float4(rgby2.xy,0,0));
  rgby2 = (temp2N + rgby2) * 0.5;
    
  rgby2 = (rgby2 + rgby1) * 0.5;
    
  bool twoTapLt = rgby2.w < fLumMin;
  bool twoTapGt = rgby2.w > fLumMax;
  
  if(twoTapLt || twoTapGt) rgby2 = rgby1;
    
  OUT.Color = rgby2;
 
  return OUT;
}

////////////////// technique /////////////////////

technique FXAA
{
  pass p0
  {        
    CullMode = None;        
    VertexShader = CompileVS FXAA_VS();
	PixelShader = CompilePS FXAA_PS();
  }
}

technique FXAAFast
{ 
  pass p0
  {        
    CullMode = None;  
	VertexShader = CompileVS FXAA_VS();
	PixelShader = CompilePS FXAAFast_PS();
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Clear screen technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 clrScrParams;

/// Samplers ////////////////////////////
// none

///////////////// vertex shader //////////////////

struct vtxOutClrScr
{
  float4 HPosition  : POSITION;
};

vtxOutClrScr ClearScreenVS(vtxIn IN)
{
  vtxOutClrScr OUT = (vtxOutClrScr)0; 
  OUT.HPosition = mul(vpMatrix, IN.Position);    
  return OUT;
}

///////////////// pixel shader //////////////////
pixout ClearScreenPS(vtxOutClrScr IN)
{
  pixout OUT;  
  OUT.Color = clrScrParams;        
  return OUT;
}

////////////////// technique /////////////////////
technique ClearScreen
{
  pass p0
  {
    VertexShader = CompileVS ClearScreenVS();
    PixelShader = CompilePS ClearScreenPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Kawase Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 blurParams0;
float4 blurParams1;

/// Samplers ////////////////////////////

// none
sampler2D blurMap0 : register(s0);

///////////////// vertex shader //////////////////

struct vtxOutKawase
{
  float4 HPosition  : POSITION;
  float2 baseTC0 : TEXCOORDN;    
  float2 baseTC1 : TEXCOORDN;    
  float2 baseTC2 : TEXCOORDN;    
  float2 baseTC3 : TEXCOORDN;    
  float2 baseTC4 : TEXCOORDN;    
};

vtxOutKawase KawaseBlurVS(vtxIn IN)
{
  vtxOutKawase OUT = (vtxOutKawase)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC0.xy = IN.baseTC.xy; // Also sample midle pixel to keep some detail
  OUT.baseTC1.xy = IN.baseTC.xy+blurParams0.xy;
  OUT.baseTC2.xy = IN.baseTC.xy+blurParams0.zw;
  OUT.baseTC3.xy = IN.baseTC.xy+blurParams1.xy;
  OUT.baseTC4.xy = IN.baseTC.xy+blurParams1.zw;

  return OUT;
}

///////////////// pixel shader //////////////////
pixout KawaseBlurPS(vtxOutKawase IN)
{
  pixout OUT;
  
  half4 baseColor0 = tex2D(blurMap0, IN.baseTC0.xy);
  half4 baseColor1 = tex2D(blurMap0, IN.baseTC1.xy);
  half4 baseColor2 = tex2D(blurMap0, IN.baseTC2.xy);
  half4 baseColor3 = tex2D(blurMap0, IN.baseTC3.xy);
  half4 baseColor4 = tex2D(blurMap0, IN.baseTC4.xy);
  
  OUT.Color = (baseColor0+baseColor1+baseColor2+baseColor3+baseColor4)/5.0;        
  
  return OUT;
}

////////////////// technique /////////////////////
technique KawaseBlur
{
  pass p0
  {
    VertexShader = CompileVS KawaseBlurVS();
    PixelShader = CompilePS KawaseBlurPS();
    
    CullMode = Back;        
  }
}

// =================================================================================================
// Technique: GaussBlur/GaussBlurBilinear
// Description: Applies a separatable vertical/horizontal gaussian blur filter
// =================================================================================================

float4 PI_psOffsets[16];
float4 psWeights[16];

struct vtxOutGauss
{
  float4 HPosition : POSITION;
  float2 baseTC : TEXCOORDN;  
  float4 tc0 : TEXCOORDN;    
  float4 tc1 : TEXCOORDN;    
  float4 tc2 : TEXCOORDN;    
  float4 tc3 : TEXCOORDN;   
  float4 tc4 : TEXCOORDN;    
  float4 tc5 : TEXCOORDN;    
  float4 tc6 : TEXCOORDN;    
  float4 tc7 : TEXCOORDN;  
};

struct vtxOutGaussMasked
{
  float4 HPosition : POSITION;
  float4 tc0 : TEXCOORDN;    
  float4 tc1 : TEXCOORDN;    
  float2 tc2 : TEXCOORDN;    
  float2 tc3 : TEXCOORDN;    
  float2 tc4 : TEXCOORDN;    
  float2 tc5 : TEXCOORDN;      
  float2 tc6 : TEXCOORDN;    
  float2 tc7 : TEXCOORDN;    
};

vtxOutGauss GaussBlurBilinearVS(vtxIn IN)
{
  vtxOutGauss OUT = (vtxOutGauss) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  #if !%_RT_SAMPLE0
	  // Coordinates for wider bloom blur.
	  half2 fScale = 750.0f * ScrSize.zw * float2(0.75*(ScrSize.x/ScrSize.y), 1.0);

	  OUT.tc0.wz = IN.baseTC.xy + PI_psOffsets[0].xy * fScale;
	  OUT.tc1.wz = IN.baseTC.xy + PI_psOffsets[1].xy * fScale;
	  OUT.tc2.wz = IN.baseTC.xy + PI_psOffsets[2].xy * fScale;
	  OUT.tc3.wz = IN.baseTC.xy + PI_psOffsets[3].xy * fScale;
	  OUT.tc4.wz = IN.baseTC.xy + PI_psOffsets[4].xy * fScale;
	  OUT.tc5.wz = IN.baseTC.xy + PI_psOffsets[5].xy * fScale;
	  OUT.tc6.wz = IN.baseTC.xy + PI_psOffsets[6].xy * fScale;
	  OUT.tc7.wz = IN.baseTC.xy + PI_psOffsets[7].xy * fScale;
  #endif

  return OUT;
}

vtxOutGaussMasked MaskedGaussBlurBilinearVS(vtxIn IN)
{
  vtxOutGaussMasked OUT = (vtxOutGaussMasked) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  // output with correct aspect ratio into wz
  OUT.tc0.wz = IN.baseTC.xy;
  OUT.tc1.wz = (IN.baseTC.xy -0.5 ) * float2(0.75*(ScrSize.x/ScrSize.y), 1.0) + 0.5;

  return OUT;
}

pixout GaussBlurBilinearPS(vtxOutGauss IN)
{
  pixout OUT;

  // Alpha channel remains unblurred for skin mask.
  half4 sum = tex2D(_tex0, IN.baseTC.xy);
    
  // Sample taps for blur.
  half4 col = tex2D(_tex0, IN.tc0.xy);  	
  sum = col * (half) psWeights[0].x;  

  col = tex2D(_tex0, IN.tc1.xy);  
  sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy);  
  sum += col * (half) psWeights[2].x;  

  col = tex2D(_tex0, IN.tc3.xy);  
  sum += col * (half) psWeights[3].x;  

  col = tex2D(_tex0, IN.tc4.xy);  
  sum += col * (half) psWeights[4].x;  
	
  col = tex2D(_tex0, IN.tc5.xy);  
  sum += col * (half) psWeights[5].x;  
	
  col = tex2D(_tex0, IN.tc6.xy);  
  sum += col * (half) psWeights[6].x;  
	
  col = tex2D(_tex0, IN.tc7.xy);  
  sum += col * (half) psWeights[7].x;  
  
  OUT.Color = sum;
  
  #if !%_RT_SAMPLE0
  	  // Second pass, wider blur for bloom
	  col = tex2D(_tex0, IN.tc0.wz);  	
	  sum.rgb += col * (half) psWeights[0].x;  

	  col = tex2D(_tex0, IN.tc1.wz);  
	  sum.rgb += col * (half) psWeights[1].x;  
		
	  col = tex2D(_tex0, IN.tc2.wz);  
	  sum.rgb += col * (half) psWeights[2].x;  

	  col = tex2D(_tex0, IN.tc3.wz);  
	  sum.rgb += col * (half) psWeights[3].x;  

	  col = tex2D(_tex0, IN.tc4.wz);  
	  sum.rgb += col * (half) psWeights[4].x;  
		
	  col = tex2D(_tex0, IN.tc5.wz);  
	  sum.rgb += col * (half) psWeights[5].x;  
		
	  col = tex2D(_tex0, IN.tc6.wz);  
	  sum.rgb += col * (half) psWeights[6].x;  
		
	  col = tex2D(_tex0, IN.tc7.wz);  
	  sum.rgb += col * (half) psWeights[7].x;  
	  
	  OUT.Color.rgb = sum.rgb * 0.5;
  #endif
  
  return OUT;
}

pixout MaskedGaussBlurBilinearPS(vtxOutGaussMasked IN)
{
  pixout OUT;

  half4 sum = 0;
  half4 orig = tex2D(_tex0, IN.tc0.wz);
  half mask = tex2D(_tex1, IN.tc1.wz).x;
  
  half4 col = tex2D(_tex0, IN.tc0.xy);  	
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[0].x;  

  col = tex2D(_tex0, IN.tc1.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[2].x;  

  col = tex2D(_tex0, IN.tc3.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[3].x;  

  col = tex2D(_tex0, IN.tc4.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[4].x;  
	
  col = tex2D(_tex0, IN.tc5.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[5].x;  
	
  col = tex2D(_tex0, IN.tc6.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[6].x;  
	
  col = tex2D(_tex0, IN.tc7.xy);  
  col = lerp(orig, col, mask);
  sum += col * (half) psWeights[7].x;
  
  OUT.Color = sum;

  return OUT;
}

pixout GaussBlurBilinearEncodedPS(vtxOutGaussMasked IN)
{
  pixout OUT;
      
  half3 sum = 0;
  half3 col = DecodeRGBS( tex2D(_tex0, IN.tc0.xy) );  	
  sum += col * (half) psWeights[0].x;  

  col = DecodeRGBS( tex2D(_tex0, IN.tc1.xy) );  
  sum += col * (half) psWeights[1].x;  
	
  col = DecodeRGBS( tex2D(_tex0, IN.tc2.xy) );  
  sum += col * (half) psWeights[2].x;  

  col = DecodeRGBS(tex2D(_tex0, IN.tc3.xy) );  
  sum += col * (half) psWeights[3].x;  

  col = DecodeRGBS(tex2D(_tex0, IN.tc4.xy) );  
  sum += col * (half) psWeights[4].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc5.xy) );  
  sum += col * (half) psWeights[5].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc6.xy) );  
  sum += col * (half) psWeights[6].x;  
	
  col = DecodeRGBS(tex2D(_tex0, IN.tc7.xy) );  
  sum += col * (half) psWeights[7].x;  

  OUT.Color = EncodeRGBS( float4( sum.xyz, 1) );
    
  return OUT;
}

// Optimized gauss blur version, making use of bilinear filtering
technique GaussBlurBilinear
{
  pass p0
  {
    VertexShader = CompileVS GaussBlurBilinearVS();
    PixelShader = CompilePS GaussBlurBilinearPS();    
  }
}

technique MaskedGaussBlurBilinear
{
  pass p0
  {
    VertexShader = CompileVS MaskedGaussBlurBilinearVS();
    PixelShader = CompilePS MaskedGaussBlurBilinearPS();    
  }
}

technique GaussBlurBilinearEncoded
{
  pass p0
  {
    VertexShader = CompileVS MaskedGaussBlurBilinearVS();
    PixelShader = CompilePS GaussBlurBilinearEncodedPS();    
  }
}

// ===================================================================================================
// Technique: GaussAlphaBlur
// Description: Applies a separatable vertical/horizontal gaussian blur filter for alpha channel only
// ===================================================================================================
// FIX:: oprimize
struct vtxOutAlphaBlur
{
  float4 HPosition : POSITION;
  float4 tc0 : TEXCOORDN;    
  float2 tc1 : TEXCOORDN;    
  float2 tc2 : TEXCOORDN;    
  float2 tc3 : TEXCOORDN;    
  float2 tc4 : TEXCOORDN;    
  float2 tc5 : TEXCOORDN;      
  float2 tc6 : TEXCOORDN;    
  float2 tc7 : TEXCOORDN;    
};

vtxOutAlphaBlur GaussAlphaBlurVS(vtxIn IN)
{
  vtxOutAlphaBlur OUT = (vtxOutAlphaBlur) 0;

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
	OUT.tc0.zw = IN.baseTC.xy;

  OUT.tc0.xy = IN.baseTC.xy + PI_psOffsets[0].xy;
  OUT.tc1.xy = IN.baseTC.xy + PI_psOffsets[1].xy;
  OUT.tc2.xy = IN.baseTC.xy + PI_psOffsets[2].xy;
  OUT.tc3.xy = IN.baseTC.xy + PI_psOffsets[3].xy;
  OUT.tc4.xy = IN.baseTC.xy + PI_psOffsets[4].xy;
  OUT.tc5.xy = IN.baseTC.xy + PI_psOffsets[5].xy;
  OUT.tc6.xy = IN.baseTC.xy + PI_psOffsets[6].xy;
  OUT.tc7.xy = IN.baseTC.xy + PI_psOffsets[7].xy;

  return OUT;
}

pixout GaussAlphaBlurPS(vtxOutAlphaBlur IN)
{
  pixout OUT;

  half sum = 0;
  
	half col = tex2D(_tex0, IN.tc0.xy).a ;  	
	sum += col * (half) psWeights[0].x;  

	col = tex2D(_tex0, IN.tc1.xy).a ;  
	sum += col * (half) psWeights[1].x;  
	
  col = tex2D(_tex0, IN.tc2.xy).a ;  
	sum += col * (half) psWeights[2].x;  

	col = tex2D(_tex0, IN.tc3.xy).a ;  
	sum += col * (half) psWeights[3].x;  

	col = tex2D(_tex0, IN.tc4.xy).a ;  
	sum += col * (half) psWeights[4].x;  
	
	col = tex2D(_tex0, IN.tc5.xy).a ;  
	sum += col * (half) psWeights[5].x;  
	
	col = tex2D(_tex0, IN.tc6.xy).a ;  
	sum += col * (half) psWeights[6].x;  
	
	col = tex2D(_tex0, IN.tc7.xy).a ;  
	sum += col * (half) psWeights[7].x;  

  OUT.Color.xyz = tex2D(_tex0, IN.tc0.zw).xyz; 
	OUT.Color.a = sum;
  return OUT;
}

technique GaussAlphaBlur
{
  pass p0
  {
    VertexShader = CompileVS GaussAlphaBlurVS();
    PixelShader = CompilePS GaussAlphaBlurPS();    
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Kawase Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

///////////////// vertex shader //////////////////

struct vtxOutAnisotropicVertical
{
  float4 HPosition  : POSITION;
  float2 baseTC0 : TEXCOORDN;    
  float2 baseTC1 : TEXCOORDN;    
  float2 baseTC2 : TEXCOORDN;    
  float2 baseTC3 : TEXCOORDN;    
  float2 baseTC4 : TEXCOORDN;    
  float2 baseTC5 : TEXCOORDN;    
  float2 baseTC6 : TEXCOORDN;    
  float2 baseTC7 : TEXCOORDN;    
};

vtxOutAnisotropicVertical AnisotropicVerticalVS(vtxIn IN)
{
  vtxOutAnisotropicVertical OUT = (vtxOutAnisotropicVertical)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
    
  OUT.baseTC0.xy = IN.baseTC.xy + float2(0,blurParams0.x)*0.125*0.75f;
  OUT.baseTC1.xy = IN.baseTC.xy + float2(0,blurParams0.y)*0.125*0.75f;
  OUT.baseTC2.xy = IN.baseTC.xy + float2(0,blurParams0.z)*0.125*0.75f;
  OUT.baseTC3.xy = IN.baseTC.xy + float2(0,blurParams0.w)*0.125*0.75f;

  OUT.baseTC4.xy = IN.baseTC.xy - float2(0,blurParams0.x)*0.75f;
  OUT.baseTC5.xy = IN.baseTC.xy - float2(0,blurParams0.y)*0.75f;
  OUT.baseTC6.xy = IN.baseTC.xy - float2(0,blurParams0.z)*0.75f;
  OUT.baseTC7.xy = IN.baseTC.xy - float2(0,blurParams0.w)*0.75f;
  
  return OUT;
}

///////////////// pixel shader //////////////////
pixout AnisotropicVerticalBlurPS(vtxOutAnisotropicVertical IN)
{
  pixout OUT;
  
  float4 canis = tex2D(blurMap0, IN.baseTC0.xy);
  canis += tex2D(blurMap0, IN.baseTC1.xy);
  canis += tex2D(blurMap0, IN.baseTC2.xy);
  canis += tex2D(blurMap0, IN.baseTC3.xy);
  canis += tex2D(blurMap0, IN.baseTC4.xy);
  canis += tex2D(blurMap0, IN.baseTC5.xy);
  canis += tex2D(blurMap0, IN.baseTC6.xy);
  canis += tex2D(blurMap0, IN.baseTC7.xy);
 

  OUT.Color = canis / 8.0;
  
  return OUT;
}

////////////////// technique /////////////////////
technique AnisotropicVertical
{
  pass p0
  {
    VertexShader = CompileVS AnisotropicVerticalVS();
    PixelShader = CompilePS AnisotropicVerticalBlurPS();
    
    CullMode = Back;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Dilate technique for sprites ///////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

float4 vPixelOffset;			// PS 1/width,1/height,?,?
float4 vDilateParams;			// PS brightness_multiplier,?,?,?

/// Constants ///////////////////////////

////////////////// samplers /////////////////////

///////////////// vertex shader //////////////////

struct vtxInDilate
{
  IN_P
  IN_TBASE
  IN_C0
};

struct vtxOutDilate
{
  float4 HPosition  : POSITION;
  float2 baseTC : TEXCOORD0;    
};

vtxOutDilate DilateVS(vtxInDilate IN)
{
  vtxOutDilate OUT = (vtxOutDilate)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);    
  OUT.baseTC.xy = IN.baseTC.xy;

	OUT.baseTC.xy+=0.00001f;		// lookup more in the middle of the texel - fixes white spots on DX10

  return OUT;
}


///////////////// pixel shader //////////////////
pixout DilatePS(vtxOutDilate IN)
{
  pixout OUT;

	const half2 Kernel_Neighbors[8+12] = 
	{
		-1.0f,0.0f,
		1.0f,0.0f,
		0.0f,-1.0f,
		0.0f,1.0f,

		-1.0f,-1.0f,
		-1.0f,1.0f,
		1.0f,-1.0f,
		1.0f,1.0f,

		-2.0f,0.0f,
		2.0f,0.0f,
		0.0f,-2.0f,
		0.0f,2.0f,

		-2.0f,1.0f,
		2.0f,1.0f,
		1.0f,-2.0f,
		1.0f,2.0f,

		-2.0f,-1.0f,
		2.0f,-1.0f,
		-1.0f,-2.0f,
		-1.0f,2.0f,
	};




	float4 cBase0 = tex2D(_tex0, IN.baseTC.xy);		                  // sun contribution
	float4 cBase1 = tex2D(_tex0, IN.baseTC.xy + vPixelOffset.zw);		// sky contribution
	
	OUT.Color = cBase0;

	half4 cColor0 = cBase0;		// sun contribution

	float2 vBestOffset = IN.baseTC.xy;
	//half2 vBestOffset = half2(0,0);

#ifdef D3D10
  [unroll]
#endif

	int iSampleCount=8;

  if( GetShaderQuality() > QUALITY_LOW )
  	iSampleCount=8+12;

	for(int i=0;i<iSampleCount;i++)	
	{
		float2 vLocalOffset = IN.baseTC.xy+Kernel_Neighbors[i].xy*vPixelOffset.xy;
		half4 cVal0 = tex2D(_tex0, vLocalOffset);		// sun contribution
		
		if (cVal0.a > 0.0f)
		{
			cColor0 = cVal0;
			vBestOffset = vLocalOffset;
		}
	}
	
	half4 cColor1 = tex2D(_tex0, vBestOffset + vPixelOffset.zw);		// sky contribution

	OUT.Color = cColor0+cColor1;

	half fContribution = max(cColor0.r,max(cColor0.g,cColor0.b)) / max(OUT.Color.r,max(OUT.Color.g,OUT.Color.b));		// Sun/(Sun+Sky)
	
	OUT.Color *= vDilateParams.x;		// adjust HDR values to LDR range

  const half SpriteAlphaRef=0.1;

	OUT.Color.a = (cBase0.a>0.0f) ? 1.0f-fContribution*(1.0-SpriteAlphaRef) : 0;
	//OUT.Color.a = cBase0.a;
	//OUT.Color = cBase1; //.a*0.3; // * 0.4;
//  OUT.Color.a = 1;
  return OUT;
}

////////////////// technique /////////////////////

technique Dilate
{
  pass p0
  {
    VertexShader = CompileVS DilateVS();            
    PixelShader = CompilePS DilatePS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Color correction technique /////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 mColorMatrix;

///////////////// pixel shader //////////////////

pixout ColorCorrectionPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = half4(tex2D(_tex0, IN.baseTC.xy).xyz, 1);         
    
  // Apply color transformation matrix to ajust saturation/brightness/constrast
  screenColor.xyz=  float3( dot(screenColor.xyzw, mColorMatrix[0].xyzw),
						    dot(screenColor.xyzw, mColorMatrix[1].xyzw),
                            dot(screenColor.xyzw, mColorMatrix[2].xyzw) );
                         
  // Ajust image gamma                                    
  //screenColor.xyz=pow(screenColor.xyz, renderModeParamsPS.w);
    
  OUT.Color = screenColor;
    
  return OUT;
}

////////////////// technique /////////////////////

technique ColorCorrection
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS ColorCorrectionPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Image blurring techniques //////////////////////////////////////////////////////////////////////

///////////////// pixel shader //////////////////

pixout BlurInterpolationPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = tex2D( _tex0, IN.baseTC.xy );
  half4 blurredColor = tex2D( _tex1, IN.baseTC.xy );
    
  OUT.Color = lerp(blurredColor, screenColor, psParams[0].w);
    
  return OUT;
}

////////////////// technique /////////////////////

technique BlurInterpolation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS BlurInterpolationPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Masked Image blurring techniques //////////////////////////////////////////////////////////////////////

///////////////// pixel shader //////////////////

pixout MaskedBlurInterpolationPS(vtxOut IN)
{
  pixout OUT;
  
  half4 screenColor = tex2D( _tex0, IN.baseTC.xy );
  half4 blurredColor = tex2D( _tex1, IN.baseTC.xy );
  half mask = tex2D( _tex2, IN.baseTC.wz ).x;
  mask = sqrt( mask );
    
  OUT.Color = lerp(blurredColor, screenColor, mask * psParams[0].w);
    
  return OUT;
}

////////////////// technique /////////////////////

technique MaskedBlurInterpolation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MaskedBlurInterpolationPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Radial blurring technique //////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

// xy = radial center screen space position, z = radius attenuation, w = blur strenght
float4 vRadialBlurParams;

///////////////// pixel shader //////////////////

pixout RadialBlurringPS(vtxOut IN)
{
  pixout OUT;
  
  float2 vScreenPos = vRadialBlurParams.xy;
  
  float2 vBlurVec = ( vScreenPos.xy - IN.baseTC.xy);
  
  float fInvRadius = vRadialBlurParams.z;
  float blurDist = saturate( 1- dot( vBlurVec.xy * fInvRadius, vBlurVec.xy * fInvRadius)) ;
  vRadialBlurParams.w *= blurDist*blurDist;
  
  const int nSamples = 8; 
  const float fWeight = 1.0 / (float) nSamples;
  
  half4 cAccum = 0;   
  for(int i=0; i < nSamples; i++)
  {
    half4 cCurr = tex2D(_tex0, (IN.baseTC.xy + vBlurVec.xy * i * vRadialBlurParams.w) );      
    cAccum += cCurr;// * (1.0-i * fWeight);
  }
    
  OUT.Color = cAccum * fWeight;
      
  return OUT;
}
////////////////// technique /////////////////////

technique RadialBlurring
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS RadialBlurringPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Motion Blur technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

struct vtxOutMotionBlurDispl
{
  float4 HPosition  : POSITION;
  float4 tcProj     : TEXCOORDN;
  float4 vVelocity  : TEXCOORDN;
  float4 vVelocityPrev  : TEXCOORDN;
};

/// Constants ////////////////////////////

float4x4 mViewProj : PI_Composite;  // ( view projection matrix )
float4x4 mViewProjI : PB_UnProjMatrix;  // invert( view projection matrix )
float4x4 mViewProjPrev;

float4 PI_motionBlurParams;

float4 motionBlurParams;
float4 motionBlurChromaParams;
float4 motionBlurCamParams;
float4 vDirectionalBlur;

/// Samplers ////////////////////////////

sampler2D motionBlurMaskMap : register(s1);

///////////////// vertex shaders //////////////////

vtxOutMotionBlurDispl MotionBlurDisplVS(vtxIn IN)
{
  vtxOutMotionBlurDispl OUT = (vtxOutMotionBlurDispl)0; 

  float4 vPos = IN.Position;
  vPos.xyz = normalize(vPos.xyz) * 25; // * motionBlurCamParams.w; // sphere size needs to be tweakable for setting blur strenght
  vPos.xyz += g_VS_WorldViewPos.xyz;
  
  OUT.HPosition = mul(vpMatrix, vPos);  
      
  float4 vNewPos = OUT.HPosition;
  float4 vPrevPos =  mul(mViewProjPrev, vPos);
  
  OUT.vVelocity =  HPosToScreenTC( vNewPos );
  OUT.vVelocityPrev = HPosToScreenTC( vPrevPos );  

  OUT.tcProj = HPosToScreenTC( OUT.HPosition );

  return OUT;
}

///////////////// pixel shaders //////////////////

pixout MotionBlurdDepthMaskPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float fDepth = GetDepthMap(depthMapSampler, IN.baseTC);
  half mask_accum = exp(-fDepth * PS_NearFarClipDist.y);//exp(-fDepth* 25);
    //saturate( 1 - fDepth* 20.0 ); // 1 alu
  mask_accum *= mask_accum; //^2                // 1 alu 
  //mask_accum *= mask_accum; //^4               // 1 alu
  //mask_accum *= mask_accum; //^8               // 1 alu


  float fRotationAmount = (motionBlurParams.w * 5.0);

  half fNearestMask = ( fDepth * PS_NearFarClipDist.y );  // 1 alu
  fNearestMask = saturate( fNearestMask - 1.0 )*saturate(mask_accum + fRotationAmount);       // 2 alu
  //tcFinal +=  vVelocityLerp.xy * (s - s * fNearestMask);							// 2 alu

  OUT.Color.xyz = tex2D(screenMapSampler, IN.baseTC);
  OUT.Color.w = fNearestMask;//fNearestMask; // store mask in screen map alpha

  return OUT;
}

pixout MotionBlurDepthMaskHDRPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  OUT.Color = tex2D(_tex0, IN.baseTC);

  float fDepth = GetDepthMap(_tex1, IN.baseTC);
  half mask_accum = exp(-fDepth * PS_NearFarClipDist.y);//exp(-fDepth* 25);
    //saturate( 1 - fDepth* 20.0 ); // 1 alu
  mask_accum *= mask_accum; //^2                // 1 alu 
  //mask_accum *= mask_accum; //^4               // 1 alu
  //mask_accum *= mask_accum; //^8               // 1 alu

  float fRotationAmount = (motionBlurParams.w * 5.0);

  half fNearestMask = ( fDepth * PS_NearFarClipDist.y );  // 1 alu
  fNearestMask = saturate( fNearestMask - 1.0 )*saturate(mask_accum + fRotationAmount);       // 2 alu
  //tcFinal +=  vVelocityLerp.xy * (s - s * fNearestMask);							// 2 alu

  OUT.Color.w = fNearestMask;//fNearestMask; // store mask in screen map alpha

  return OUT;
}

float2 GetVelocity( sampler2D sVelocity, float2 tc )
{
  float4 cVelocity = tex2Dlod(sVelocity, float4(tc.xy, 0, 0));
  float fDecodedLenght = cVelocity.z; //dot(cVelocity.zw, float2( 255.0 * 255.0 , 255.0 ) );

  return cVelocity.xy; //(cVelocity.xy*2-1) * fDecodedLenght;
}

pixout MotionBlurObjectPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[8] = {  
	  float2( 0.0,      0.0),
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cDummyFetchDx10 = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)); // dummy fetch for dx10 samplers order declaration workaround
  float fOrigDepth = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;

  float4 Blurred = 0;  
  float2 pixelVelocity;
  
  int NumberOfPostProcessSamples = 8;           
  int nSamples = 8;
  float fUseAllSamples = 0;

  //bool bSingleSample = true;

  //{
  //  for(int n= 0; n<nSamples; n++)
  //  {	    
  //    float2 vOffset = poisson[n]* 0.0333;  // this must scale depending on camera distance
  //    // Sample neightboord pixels velocity
  //    float4 curFramePixelVelocity = tex2Dlod(_tex0, float4(OriginalUV + vOffset, 0, 0));
	 // 	if( !dot(curFramePixelVelocity, 1) )
  //    {
  //      fUseAllSamples = 1;
  //      break;
  //    }
  //  }
  //}

  int s= 0;

#if D3D10
  [unroll]
#endif
  for(int n= 0; n<nSamples; n++)
  {	    
    // todo: this must scale depending on camera distance or object size on screen
    float2 vOffset = poisson[n]* 0.0333 * saturate((1-fOrigDepth)*(1-fOrigDepth) );
    float  fCurrDepth = tex2Dlod(_tex2, float4(OriginalUV + vOffset, 0, 0)).x;
    if ( fCurrDepth > fOrigDepth )
      continue;

    // Sample neightboord pixels velocity
    float2 curFramePixelVelocity = GetVelocity(_tex1, OriginalUV + vOffset);
    pixelVelocity.xy =  curFramePixelVelocity;
        
    half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
		if( fLen )
		{	           
#if D3D10
  [unroll]
#endif
	    for(float i = 0; i < NumberOfPostProcessSamples; i++)
	    {   
	    	float2 lookup = pixelVelocity * ((i / NumberOfPostProcessSamples)-0.5) * PI_motionBlurParams.x + OriginalUV;
	      	      
	      // Lookup color/velocity at this new spot
	      float4 Current = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));
	    	float4 curVelocity = tex2Dlod(_tex1, float4(lookup.xy, 0, 0));
	    	half fBlend = ( length(curVelocity)); 
	    	//float2 curVelocity = GetVelocity(_tex1, lookup.xy);
	    	//float fBlend = length(curVelocity); 
	    		    		      
	      Blurred.xyz += Current;
	      Blurred.w  += fBlend;	      
	      s++;
	    }            
    }

//    if( !fUseAllSamples )
  //    break;
  }

  OUT.Color = float4( cOrig.xyz, 1);
  if( s )
  {
    // Return the average color of all the samples
    float fLerp = Blurred.w/(float)s;     
    OUT.Color.xyz =float4(lerp(cOrig.xyz, Blurred.xyz/(float)s, saturate(fLerp*3)), 1);
  }

  return OUT;
}

pixout MotionBlurObjectMaskPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[7] = {  
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  half2 cOrigVelocity = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0)).xy;
  float fOrigDepth = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)).x;

  float4 Blurred = 0;  
  float2 pixelVelocity;
  
  int nSamples = 7;

  const half fOffsetRange = 100.0;
  
  PS_ScreenSize.zw *= fOffsetRange;
  const half2 vOffsetScale = PS_ScreenSize.zw; // 0.0333 old hardcoded scale

  const half fMinVelocityThreshold = 0.0001;

  half fCenterVelocity = dot( cOrigVelocity.xy, cOrigVelocity.xy);

  if(( fCenterVelocity ) ) // Inside case 
  {
    OUT.Color.x = 1;
    OUT.Color.w = fCenterVelocity > fMinVelocityThreshold.xx; // set second pass mask
    return OUT;
  }
  else 
  {
#if D3D10
  [unroll]
#endif
    for(int n= 0; n<nSamples; n++) // Borders case 
    {	    
      // todo: this must scale depending on camera distance or object size on screen
      float2 vOffset = poisson[n]* vOffsetScale;

      // Sample neightboord pixels velocity
      pixelVelocity.xy = tex2Dlod(_tex0, float4(OriginalUV + vOffset, 0, 0)).xy;

      half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
      OUT.Color.y += fLen;
    }
    

    OUT.Color.y = OUT.Color.y / (float) nSamples;
    OUT.Color.w = OUT.Color.yy > fMinVelocityThreshold.xx;
    OUT.Color.y = (OUT.Color.y > 0.0); // set second pass mask

    return OUT;
  }

//  OUT.Color = saturate( OUT.Color * 10000 );

  return OUT;
}

pixout MotionBlurObjectUsingMaskPS(vtxOut IN)
{
  // premiliary object motion blur optimization using motion mask

  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

	float2 poisson[7] = {  
    float2( 0.527837,-0.085868),
	  float2(-0.040088, 0.536087),
	  float2(-0.670445,-0.179949),
	  float2(-0.419418,-0.616039),
	  float2( 0.440453,-0.639399),
	  float2(-0.757088, 0.349334),
	  float2( 0.574619, 0.685879)
	};
	
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cDummyFetchDx10 = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0)); // dummy fetch for dx10 samplers order declaration workaround
  float fOrigDepth = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;
  half4 cMask = tex2Dlod(_tex3, float4(IN.baseTC.xy, 0, 0)).xyzw;

  // dx10 sampler binding workaround...
  //OUT.Color = (cOrig + cDummyFetchDx10 + fOrigDepth + cMask)*0.000001;

  OUT.Color = cOrig;//

  if( dot( cMask.xy, 1) == 0.0 ) 
    return OUT;

  float4 Blurred = 0;
  float2 pixelVelocity;

  int nSamples = 8;

  const int nSamplesEdges = 7;
  const float nRecipSamples = 1.0 / (float)nSamples;

  float s= 0;

  if( cMask.x ) // sample is inside mesh - do regular motion blurring
  {
    // get velocity
    pixelVelocity.xy = GetVelocity(_tex1, OriginalUV) * PI_motionBlurParams.x;
  #if D3D10
    [unroll]
  #endif
    for(float i = 0; i < nSamples; i++)
    {   
  	  float2 lookup = pixelVelocity * ((i * nRecipSamples)-0.5) + OriginalUV;
      Blurred.xyz += tex2Dlod(_tex0, float4(lookup.xy, 0, 0)).xyz;
    }

    OUT.Color = half4(Blurred.xyz * nRecipSamples, 1);
    return OUT;
  }  
  else // samples are in mesh edges
  {

  #if D3D10
    [unroll]
  #endif
    for(int n= 0; n<nSamplesEdges; n++)
    {	    
      // todo: this must scale depending on camera distance or object size on screen
      float2 vOffset = poisson[n]* 0.0333 * saturate((1-fOrigDepth)*(1-fOrigDepth) );
      float  fCurrDepth = tex2Dlod(_tex2, float4(OriginalUV + vOffset, 0, 0)).x;
      if ( fCurrDepth > fOrigDepth )
        continue;

      // Sample neightboord pixels velocity
      float2 curFramePixelVelocity = GetVelocity(_tex1, OriginalUV + vOffset);
      pixelVelocity.xy =  curFramePixelVelocity ;
          
      half fLen = dot(pixelVelocity.xy,pixelVelocity.xy);
		  if( fLen )
		  {
  #if D3D10
    [unroll]
  #endif
        for(float i = 0; i < nSamples; i++)
        {   
	    	  float2 lookup = pixelVelocity * ((i * nRecipSamples)-0.5)* PI_motionBlurParams.x + OriginalUV;

          // Lookup color/velocity at this new spot
	        float3 Current = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));
	    	  float4 curVelocity = tex2Dlod(_tex1, float4(lookup.xy, 0, 0));
	    	  half fBlend = ( dot(curVelocity, curVelocity)); 

	        Blurred += half4(Current.xyz, fBlend);
	      }

	      s+= nSamples;
      }
    }

    if( s )
    {
      // Return the average color of all the samples
      half fLerp = Blurred.w/s;     
      OUT.Color.xyz =float4(lerp(cOrig.xyz, Blurred.xyz/s, saturate(fLerp*3)), 1);
    }
  }

  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_VelocityIDRescalePS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float2 vScreenSizeRecip = PI_motionBlurParams.zw; //1.0 / PS_ScreenSize;
  float4 t0 = tex2D(_tex0, IN.baseTC.xy);
  float4 t1 = tex2D(_tex0, IN.baseTC.xy + float2(1,1) * vScreenSizeRecip);
  float4 t2 = tex2D(_tex0, IN.baseTC.xy - float2(1,1) * vScreenSizeRecip);
  float4 t3 = tex2D(_tex0, IN.baseTC.xy + float2(-1,1)* vScreenSizeRecip);
  float4 t4 = tex2D(_tex0, IN.baseTC.xy + float2(1,-1)* vScreenSizeRecip);

  // Use maximum depth
  t0 = (t0.z>t1.z)? t0: t1;
  t0 = (t0.z>t2.z)? t0: t2;
  t0 = (t0.z>t3.z)? t0: t3;
  t0 = (t0.z>t4.z)? t0: t4;
  
  OUT.Color = t0;

  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

float GetResampledOrigDepth( float2 tc )
{
  float2 vScreenSizeRecip = PI_motionBlurParams.xy; //1.0 / PS_ScreenSize; //0.25*PI_motionBlurParams.zw;  // hardcoded half-texel size 
  float t0 = tex2Dlod(_tex1, float4(tc.xy, 0, 0)).x;
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(1,1) * vScreenSizeRecip, 0, 0)).x );
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy - float2(1,1) * vScreenSizeRecip, 0, 0)).x );
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(-1,1)* vScreenSizeRecip, 0, 0)).x );
  
  t0 = max(t0, tex2Dlod(_tex1, float4(tc.xy + float2(1,-1)* vScreenSizeRecip, 0, 0)).x );

  return t0;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_OffsetMapPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 t0 = tex2D(_tex0, IN.baseTC);

  float fLen = length(t0.xy);
  float fSizeScale = 1 - saturate(t0.z * PS_NearFarClipDist.y / 200);
  fSizeScale *= fSizeScale;
  fSizeScale *= fSizeScale;
  fSizeScale *= fSizeScale;



  OUT.Color = t0;
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_CopyAlphaIDPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 t0 = tex2D(_tex0, IN.baseTC);
  //float4 t1 = tex2D(_tex1, IN.baseTC);

  OUT.Color = t0; //float4(t0.xyw, t1.w);
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_VelocityDilationPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;

  const int nOffsets = 8;

  float2 vOffsets[ nOffsets ] =
  {  
		-1.0f, 0.0f,
		 1.0f, 0.0f,

		-2.0f, 0.0f,
		 2.0f, 0.0f,

		-3.0f, 0.0f,
		 3.0f, 0.0f,

		-4.0f, 0.0f,
		 4.0f, 0.0f,
  };

  float2 vScrSizeRecip = PS_ScreenSize.zw * 2.0;//PI_motionBlurParams.zw;

  float4 vCenterVelocity = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float fCenterDepth = GetResampledOrigDepth(IN.baseTC.xy );
  float fOffsetScale = tex2Dlod(_tex2, float4(IN.baseTC.xy, 0, 0)).x;

  if( fOffsetScale == 0 || dot(vCenterVelocity.xy, vCenterVelocity.xy) )
  {
    // Inside
    OUT.Color = float4(vCenterVelocity.xyzw); 
    return OUT;
  }

  // Check edges
  float4 Blurred = 0;
  float nSamplesCount = 0;
 
#if D3D10
  [unroll]
#endif
  for(int n = 0; n < nOffsets; n++ )
  {  
    #if %_RT_SAMPLE0
		float4 vCurrVelocityDepthID = tex2Dlod(_tex0, float4(IN.baseTC.xy + vOffsets[n].yx *vScrSizeRecip, 0, 0));
	#else
		float4 vCurrVelocityDepthID = tex2Dlod(_tex0, float4(IN.baseTC.xy + vOffsets[n].xy *vScrSizeRecip, 0, 0));
	#endif

    float fDepthCmp = saturate( fCenterDepth - vCurrVelocityDepthID.z );
    fDepthCmp *= dot( vCurrVelocityDepthID.xy, vCurrVelocityDepthID.xy );
    fDepthCmp *= Blurred.z == 0;
    
    if(fDepthCmp)
    {
      //float weight = lerp(1, 0, (float)n / nOffsets);
      Blurred = vCurrVelocityDepthID;// * weight;
    }
  }

  OUT.Color = float4(Blurred);
  return OUT;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

pixout OMB_UsingVelocityDilationPS(vtxOut IN)
{
  pixout OUT = (pixout)0;  

  float4 OriginalUV = IN.baseTC;
  float4 cOrig = tex2Dlod(_tex0, float4(IN.baseTC.xy, 0, 0));
  float4 cOrigVelocity = tex2Dlod(_tex1, float4(IN.baseTC.xy, 0, 0));

  OUT.Color = cOrig;
  if( dot(cOrigVelocity.xy, cOrigVelocity.xy) == 0.0)
    return OUT;

  float4 Blurred = 0;
  float2 pixelVelocity;

  const int nSamples = 16;
  const float nRecipSamples = 1.0 / (float)nSamples;
  const float nRecipSamplesAcc = 1.0 / (float)(nSamples-1);

  // get velocity
  pixelVelocity.xy = cOrigVelocity;

  float fLen = length(pixelVelocity.xy);
  if( fLen )
    pixelVelocity.xy /= fLen;

  float2 vScrSizeRecip = 1.0 / PS_ScreenSize;
  const float2 vMaxRange = 48 * vScrSizeRecip.xy; //48.0

  pixelVelocity.xy *= min(fLen, vMaxRange)* PI_motionBlurParams.x;

  float nSamplesCount = 0;
#if D3D10
  [unroll]
#endif
  for(float i = 0; i < nSamples; i++)
  {   
	  float2 lookup = pixelVelocity * ((i * nRecipSamplesAcc)-0.5) + OriginalUV;
   
    float4 tcMB = tex2Dlod(_tex0, float4(lookup.xy, 0, 0));     
    Blurred.xyz += tcMB.xyz;

#if !%_RT_SAMPLE1
      Blurred.w += saturate(100000 * tcMB.w);
#else
      // reusing previous pass blending results
      Blurred.w += tcMB.w;
#endif
  }

  // Blend results with scene
  if( Blurred.w )
  {
    Blurred.xyz *= nRecipSamples;
#if !%_RT_SAMPLE1
    OUT.Color = lerp(cOrig, Blurred,saturate( saturate( Blurred.w*nRecipSamples)*2+ saturate(cOrig.w*1000)));
#else
    OUT.Color = lerp(cOrig, Blurred,saturate( saturate( Blurred.w*nRecipSamples)*2));
#endif
  }
  
  OUT.Color.w = Blurred.w * nRecipSamples;
    
  return OUT;
}

pixout MotionBlurDisplPS(vtxOutMotionBlurDispl IN)
{  
  pixout OUT = (pixout)0;  

  int nQuality = GetShaderQuality();

  half4 cMidCurr = tex2Dproj(screenMapSampler, IN.tcProj.xyzw);  
  half fDepth = GetDepthMapScaledProj(depthMapSampler, IN.tcProj.xyzw);               // 1 alu

  OUT.Color = cMidCurr;

  float fSamples = 8.0;

  const float fWeight = (1.0 / fSamples);  
  const float fWeightStep = (2.0 / fSamples);
  
  //motionBlurParams.w = 1.5;

  float2 vVelocityPrev = ( (IN.vVelocityPrev.xy/IN.vVelocityPrev.w))* PI_motionBlurParams.w;	// 1 div, 1 mul
  
  float2 vVelocity = (IN.vVelocity.xy/IN.vVelocity.w);									// 1 div
  float2 vVelocityDiv = vVelocity;
  vVelocity *= PI_motionBlurParams.w;

  float2 vVelocityLerp = vVelocityPrev - vVelocity;										// 1 sub							

  vVelocityDiv.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
  vVelocityLerp.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
      
  float4 accum = 0;

#if D3D10
  [unroll]
#endif
  for(float s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																						
	  float2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    
    if( nQuality == QUALITY_HIGH )
    {
      half fDepthMask = tex2D(screenMapSampler, tcFinal).w;
      tcFinal +=  vVelocityLerp.xy * (s - s * fDepthMask);							// 2 alu
    }

    accum += tex2D(screenMapSampler, tcFinal ); // 1 alu
  }
  
  accum *= fWeight;                                                                                 // 1 alu
   
  // Remove scene bleeding from 1st player hands
  OUT.Color = lerp(cMidCurr, accum, saturate(fDepth-1.0) );                                        // 3 alu //fDepth*100; //
  
  return OUT;
}


pixout MotionBlurDisplHDRPS(vtxOutMotionBlurDispl IN)
{  
  pixout OUT = (pixout)0;  
  
  int nQuality = GetShaderQuality();

  half4 cMidCurr = tex2Dproj(_tex0, IN.tcProj.xyzw);  
  float fDepth = tex2Dproj(_tex1, IN.tcProj.xyzw).x * PS_NearFarClipDist.y;               // 1 alu

  OUT.Color = cMidCurr;
  
  // skip bellow min threshold (usually sky and nearby geometry) with slow movement
#if %_RT_SAMPLE0
  const float fMinDepthMaskThreshold = 0.05;

  // this is not 100% correct since still needed to sample faraway pixels
  // but for 1st pass is ok - artefacts mostly noticable with fast camera movement
  if( cMidCurr.w < fMinDepthMaskThreshold )   // saves about 1 ms 
    return OUT;
#endif

  // skip nearby geometry with fast movement
  if( fDepth - 1.0f <= 0.0f)
    return OUT;

  half2 vVelocityPrev = ( (IN.vVelocityPrev.xy/IN.vVelocityPrev.w))* PI_motionBlurParams.w;	// 1 div, 1 mul
  
  half2 vVelocity = (IN.vVelocity.xy/IN.vVelocity.w);									// 1 div
  half2 vVelocityDiv = vVelocity;
  vVelocity *= PI_motionBlurParams.w;

  half2 vVelocityLerp = vVelocityPrev - vVelocity;										// 1 sub							

  vVelocityDiv.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
  vVelocityLerp.xy += vDirectionalBlur.xy *  PI_motionBlurParams.w;
      
  half4 accum = 0;

  half fSamples = 8.0;

#if %_RT_SAMPLE1
  fSamples = 4.0;
#endif

#if %_RT_SAMPLE0

  // use-lower quality masking for first pass

  const half fWeight = (1.0 / fSamples);;  
  const half fWeightStep = (2.0 / fSamples);

#if D3D10
  [unroll]
#endif
  for(half s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																
    half2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    half4 col = tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0) ); // 1 alu
    accum += lerp(cMidCurr, col, col.w );
  }

  accum *= fWeight;                                                                                 // 1 alu
  // Remove scene bleeding from 1st player hands
  OUT.Color = accum; //lerp(cMidCurr, accum, saturate(fDepth-1.0) ); 
#else

  int scount = 0;

  const half fWeight = (1.0 / fSamples);;  
  const half fWeightStep = (2.0 / fSamples);

#if D3D10
  [unroll]
#endif
  for(half s = -1.0; s < 1.0 ; s += fWeightStep )										// 1 add
  {																
    half2 tcFinal =  vVelocityDiv.xy - vVelocityLerp.xy * s;							// 1 alu
    half fDepthMask = tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0)).w;
    if( fDepthMask )
    {
      tcFinal +=  vVelocityLerp.xy * (s - s * fDepthMask);							// 2 alu
      accum += tex2Dlod(_tex0, float4(tcFinal.xy, 0, 0) ); // 1 alu
      scount++;
    }

  }

  // Remove scene bleeding from 1st player hands
  if( scount )
  {
    accum /= (half) scount ;
    // Remove scene bleeding from 1st player hands
    OUT.Color = accum; //lerp(cMidCurr, accum, saturate(fDepth-1.0) ); 
  }

#endif


 return OUT;
}

////////////////// technique /////////////////////

technique MotionBlurMaskGen
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurdDepthMaskPS();
    CullMode = None;        
  }
}

technique MotionBlurMaskGenHDR
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurDepthMaskHDRPS();
    CullMode = None;        
  }
}

technique MotionBlurDispl
{
  pass p0
  {
    VertexShader = CompileVS MotionBlurDisplVS();
    PixelShader = CompilePS MotionBlurDisplPS();
    CullMode = None;        
  }
}

technique OMB_OffsetMap
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_OffsetMapPS();
    CullMode = None;        
  }
}

technique OMB_CopyAlphaID
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_CopyAlphaIDPS();
    CullMode = None;        
  }
}


#if %DYN_BRANCHING_POSTPROCESS

technique MotionBlurDisplHDR
{
  pass p0
  {
    VertexShader = CompileVS MotionBlurDisplVS();//MotionBlurDisplHDRVS();
    PixelShader = CompilePS MotionBlurDisplHDRPS();
    CullMode = None;        
  }
}

technique MotionBlurObject
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectPS();
    CullMode = None;      
  }
}

technique MotionBlurObjectUsingMask
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectUsingMaskPS();
    CullMode = None;      
  }
}

technique MotionBlurObjectMask
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MotionBlurObjectMaskPS();
    CullMode = None;      
  }
}

technique OMB_VelocityIDRescale
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_VelocityIDRescalePS();
    CullMode = None;      
  }
}

technique OMB_VelocityDilation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_VelocityDilationPS();
    CullMode = None;      
  }
}

technique OMB_UsingVelocityDilation
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS OMB_UsingVelocityDilationPS();
    CullMode = None;      
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth of field technique ///////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

#define DOF_ANAMORPHIC_LENS 0
#define DOF_OPTICAL_SIMULATION 1
#define DOF_USE_NOISE 0

/// Constants ////////////////////////////

float4 dofParamsFocus;
float4 dofParamsBlur;
float4 pixelSizes;
float radiusScale = 0.4;
float dofMinThreshold = 0.2;//0.5; // to ensure a smoother transition between near focus plane and focused area

/// Samplers /////////////////////////////

///////////////// vertex shader //////////////////

///////////////// pixel shader //////////////////
float GetDepthBlurred( sampler2D _tex0, half2 baseTC, float depthOrig, half blurAmount )
{  
	half2 pixelSizes = PS_ScreenSize.zw * 4.h * dofParamsFocus.w;
	float baseColor = depthOrig;
	float weightSum = 1.0;
		
	#if D3D10
	[unroll]
	#endif
	for(int i=1; i<8; i++)
	{
		float weight = lerp(1, 0, (float)i/8.f);
		float blurredDepth = GetDepthMapScaled(_tex0, baseTC.xy + i * pixelSizes);
		blurredDepth += GetDepthMapScaled(_tex0, baseTC.xy - i * pixelSizes);
		blurredDepth *= 0.5;
		blurredDepth = (blurredDepth < depthOrig) ? blurredDepth : depthOrig;
		baseColor += blurredDepth * weight;
		weightSum += weight;
	}
     
    float fDepth = baseColor / (weightSum + 1e-6); 
	return fDepth;
}

half GetDepthBlurinessBiased(half fDepth)
{
  half f=0; 

  // 0 - in focus
  // 1 or -1 - completely out of focus
    
  if(fDepth>(half)dofParamsFocus.y)
  {
    f=(fDepth-(half)dofParamsFocus.y)/(half)dofParamsFocus.z; // max range
    f=clamp(f, 0, 1-(half)dofMinThreshold);   
  }
  else
  if(fDepth<=(half)dofParamsFocus.x)
  {   
    f=(1-fDepth/dofParamsFocus.x)/dofParamsFocus.w;  // min range
  }
  
  return f;
}

pixout CopyDepthToAlphaBiasedNoMaskPS(vtxOut IN)
{
  pixout OUT;  

  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);        	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = (GetDepthBlurinessBiased(depthNormalized))*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = (GetDepthBlurinessBiased(blurred))*dofParamsFocus.w;
			 	
  // Compare blurred depth and unblurred depth.		
  if( blurred >= depth )
	  depth = blurred;
	
  // Encode magnification mask into first pixel.  
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 0.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex1, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );
#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

pixout CopyDepthToAlphaBiasedPS(vtxOut IN)
{
  pixout OUT;  
        
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);  
  half depthMaskColor = tex2D(_tex1, IN.baseTC.xy).x;  
      	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = (GetDepthBlurinessBiased(depthNormalized) * depthMaskColor)*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = (GetDepthBlurinessBiased(blurred) * depthMaskColor)*dofParamsFocus.w;
	 
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 0.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex2, IN.baseTC.xy).xyz, 0);


  //cScreen = max( min( cScreen, (float3)10000000 ), 0 );
  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

half GetDepthBluriness(half fDepth)
{  
  half f=fDepth-(half)dofParamsFocus.z;
  
  // 0 - in focus
  // 1 or -1 - completely out of focus
    
   
  if(fDepth<(half)dofParamsFocus.z)
  {
    f/=(half)dofParamsFocus.x;   
  }
  else
  {
    f/=(half)dofParamsFocus.y;         
    f=clamp(f, 0, 1-(half)dofMinThreshold);   
  }
  
  return f;
}

pixout CopyDepthToAlphaNoMaskPS(vtxOut IN)
{
  pixout OUT;  
        
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);        	
  half depthNormalized = depthMap.x*PS_NearFarClipDist.y;	  
  half depth = saturate(GetDepthBluriness(depthNormalized))*dofParamsFocus.w;	  

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = saturate(GetDepthBluriness(blurred))*dofParamsFocus.w;
  
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 1.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex1, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

pixout CopyDepthToAlphaPS(vtxOut IN)
{
  pixout OUT;  
   
  half depthMap = GetDepthMap(_tex0, IN.baseTC.xy);  
  half depthMaskColor = tex2D(_tex1, IN.baseTC.xy).x; 
      	
  half depthNormalized =depthMap.x*PS_NearFarClipDist.y;
  half depth = saturate((GetDepthBluriness(depthNormalized))*depthMaskColor + depthMaskColor)*dofParamsFocus.w;

  // Generate a blurred depth image.
  half blurred = GetDepthBlurred(_tex0, IN.baseTC.xy, depthNormalized, dofParamsFocus.w).x;
  blurred = saturate((GetDepthBluriness(blurred))*depthMaskColor + depthMaskColor)*dofParamsFocus.w;
  
  // Compare blurred depth and unblurred depth.				 				
  if( blurred >= depth )
	  depth = blurred;

  // Encode magnification mask into first pixel.
  //if((IN.baseTC.x <= PS_ScreenSize.z) && (IN.baseTC.y <= PS_ScreenSize.w)) depth = 1.0;

#if %_RT_SAMPLE0
  half3 cScreen = max( tex2D(_tex2, IN.baseTC.xy).xyz, 0);

  // do same nan check as in hdr pass
  cScreen.rgb = (cScreen.rgb> 10000.0f)? half3(1, 1, 1): cScreen.rgb;
  //if(abs(dot(cScreen, 0.333)) > 10000.0f) cScreen = 1.0f; // more stable/less flicker

  OUT.Color.xyzw = half4( cScreen.xyz, (depth*0.5+0.5) );

#else
  OUT.Color = (depth*0.5+0.5);
#endif
  
  return OUT;
}

half4 DOFMergeLayers(half4 tapLow, half4 tapMed, half4 tapHigh, half centerDepth)
{
	  half4 cOut = 0;
	  half4 weights = saturate( centerDepth * half4( -2, -4, -4, 4 ) + half4( 1, 3, 4, -3) );
	  weights.yz = min( weights.yz, 1 - weights.xy );
	  	
	  cOut.xyz = weights.y * tapHigh.xyz + weights.z * tapMed.xyz + weights.w * tapLow.xyz;
	  cOut.a = dot( weights.yzw, half3(16.0f / 17.0f, 1.0f, 1.0f) );
	  
	  return cOut;
}

pixout DofHDRPS(vtxOut IN)
{
  pixout OUT;
  
  int nQuality = GetShaderQuality();

  const int tapCount = 37;
  // xy = poisson coordinates, z = aberration intensity
  float3 poisson[37] =
  {		
		 0.0,   0.0,  1.0,		 
		-1.0,   0.0,  1.0,
		-2.0,   0.0,  1.0,
		-3.0,   0.0,  1.5,
		 3.0,   0.0,  1.5,
		 2.0,   0.0,  1.0,
		 1.0,   0.0,  1.0, // 7
		 0.0,   1.0,  1.0,
		 0.0,   2.0,  1.0,
		 0.0,   3.0,  8.0,
	 	 0.0,  -3.0,  1.5,
		 0.0,  -2.0,  1.0,
		 0.0,  -1.0,  1.0, // 13
		-0.75,  0.75, 1.0,
		-1.75,  1.0,  1.0,
		-2.75,  1.0,  2.0,
		 2.75,  1.0,  2.0,
		 1.75,  1.0,  1.0,
		 0.75,  0.75, 1.0, // 19
		-0.75, -0.75, 1.0,
		-1.75, -1.0,  1.0,
		-2.75, -1.0,  1.5,
		 2.75, -1.0,  1.5,
		 1.75, -1.0,  1.0,
		 0.75, -0.75, 1.0, // 25
		-2.0,   2.0,  6.0,	 
		-2.0,  -2.0,  1.5,
		-1.0,  -1.75, 1.0,
		 1.0,  -1.75, 1.0,
		 2.0,  -2.0,  1.5,
		 2.0,   2.0,  6.0, // 31
		-1.0,   1.75, 1.0,
		-1.0,   2.75, 8.0,
		 1.0,   2.75, 8.0,
		-1.0,  -2.75, 1.5,
		 1.0,  -2.75, 1.5,
		 1.0,   1.75, 1.0, // 37
  };
    
  // Magnify image based on focus distance.
  //float fMagMask = tex2D(_tex0, float2(0.0, 0.0)).a*2-1;
  //if(fMagMask)
	//IN.baseTC.xy = (IN.baseTC.xy - 0.5) * lerp(0.9875f, 1.0f, saturate(dofParamsFocus.z * 0.1)) + 0.5;
  
  // Initial scene.
  OUT.Color = tex2D(_tex0, IN.baseTC.xy);

  // fetch center tap from blured low res image
  float centerDepth = tex2D(_tex1, IN.baseTC.xy).a; 
  //float centerDepth = tex2D(_tex0, IN.baseTC.xy).a; 
    
  // Early out if there's no visual difference.
  if(centerDepth < 0.01)
  {		
	  return OUT;
  }
                   
  float2 vNoise = 0.0;
  if(DOF_USE_NOISE == 1)
  {
	  float2 vNoiseTC = IN.baseTC.xy * PS_ScreenSize.xy / 64.0;
	  vNoise = tex2Dlod(_tex2, float4(vNoiseTC, 0, 0)) + dot(IN.baseTC.xy, 1) * 65535;
	  vNoise = frac( vNoise );
	  vNoise = vNoise*2-1;
	  vNoise *= 0.05;
  }
      
  // Calculate aspect ratio.
  half2 fAspectRatio = 1.0f;
  if(DOF_ANAMORPHIC_LENS == 1)
	  fAspectRatio = half2(1.333f, 2.4f) / 2.4f;

  // Calculate radius.
  half discRadius=(centerDepth*(half)dofParamsBlur.y-(half)dofParamsBlur.x);
  
  half4 texSizes = pixelSizes.xyzw * discRadius * fAspectRatio.xyxy;
  texSizes.zw *= (half)radiusScale;

  // Calculate vignette.
  float2 coordN = IN.baseTC.xy * 2.0 - 1.0;
  float vignette = saturate(dot(coordN, coordN)-0.25);
    
  // Rotation masks.
  float2 vignetteMask = saturate(coordN) + saturate(-coordN);
  
  // Create a rotation matrix based on screen coordinates.
  float2x2 rotationMatrixX = RotationMatrix((-coordN.x * (PI*0.5)) * vignetteMask.x);
  float2x2 rotationMatrixY = RotationMatrix(((1-IN.baseTC.y) * PI) * vignetteMask.y);
  float2x2 rotationMatrix = mul(rotationMatrixY, rotationMatrixX);

  // Go through samples and sum.
  half4 cOut = 0;
  half4 cSumWeights = 0;
  
#if D3D10
  [unroll]
#endif
  for(int t=0; t<tapCount; t++)
  { 
	  //poisson[t].xy = float2( poisson[t].x * 0.866 - poisson[t].y * 0.5, poisson[t].x * 0.5 + poisson[t].y * 0.866 ); // 30 deg rot
	  //poisson[t].xy = 0.707 * poisson[t].xy + 0.707 * float2( - poisson[t].y, poisson[t].x); // 45 deg rot

	  // Rotate poisson using matrix.
	  poisson[t].xy = mul(rotationMatrix, poisson[t].xy);
				
	  // Scale aberration outwards of the screen for aberration vignetting.
	  poisson[t].z = lerp(1.0, poisson[t].z, vignette);
		  
	  float4 tapCoord = IN.baseTC.xyxy + (poisson[t].xyxy + vNoise.xyxy) * texSizes.xyzw;

	  half4 tapHigh = tex2Dlod(_tex0, float4(tapCoord.xy, 0, 0));
	  half4 tapLow = tex2Dlod(_tex1, float4(tapCoord.zw, 0, 0));
		 	      	  
	  //half tapLerp = (tapHigh.a * 2.0 - 1.0);        
	  //half4 tap = lerp(tapHigh, tapLow, saturate(tapLerp));    
	  half4 tap = DOFMergeLayers(tapLow, tapLow, tapHigh, centerDepth);
	  half tapA = tap.a;
		    
	  // Apply leak reduction. Make sure only to reduce on focused areas            
	  tap.a = (tapLow.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tap.a * 2.0 - 1.0);    
	  //tap.a = (tapHigh.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tapHigh.a * 2.0 - 1.0);    
   
   
	  // High-res only.
	  //float2 tapCoord = IN.baseTC.xy + (poisson[t].xy + vNoise.xy) * texSizes.xy;
	  //half4 tap = tex2Dlod(_tex0, float4(tapCoord.xy, 0, 0));
	      
	  //tap.a = (tap.a - centerDepth + (half)dofMinThreshold > 0.0) ? 1 : saturate(tap.a * 2.0 - 1.0);    
		     
	  half4 bokehColor = poisson[t].z;
	  if(bokehColor.x > 1.0)
		bokehColor *= half4(0.5, 1.0, 1.0, 1.0);
		             
	  cOut += tap.a * tap * bokehColor;
	  cSumWeights += tap.a * bokehColor;
  }
   
  OUT.Color = cOut/(cSumWeights + 1e-6);
		  		
  return OUT;
}

pixout DofPS(vtxOut IN)
{
  pixout OUT;

  int nQuality = GetShaderQuality();

  const int tapCount = 8;

  float2 poisson[8] =
  {
       0.0,    0.0,
     0.527, -0.085,
    -0.040,  0.536,
    -0.670, -0.179,
    -0.419, -0.616,
     0.440, -0.639,
    -0.757,  0.349,
     0.574,  0.685,
  };
   
  half4 cOut=0;
  half discRadius;
  half discRadiusLow;
  half centerDepth;
        
#if D3D10	
  // temporary workaround for d3d10 hlsl compiler bug
  OUT.Color = tex2D(_tex0, IN.baseTC.xy);
#endif
        
  // fetch center tap from blured low res image
  centerDepth=tex2D(_tex1, IN.baseTC.xy).w;    
  //centerDepth=tex2D(_tex0, IN.baseTC.xy).w;    

  discRadius=(centerDepth*(half)dofParamsBlur.y-(half)dofParamsBlur.x);
  discRadiusLow=discRadius*(half)radiusScale;
  
  pixelSizes.xy=(half2)pixelSizes.xy*discRadius;
  pixelSizes.wz=(half2)pixelSizes.zw*discRadiusLow;

#if D3D10
  [unroll]
#endif
  for(int t=0; t<tapCount; t++)
  { 
    half4 tapHigh=tex2D(_tex0, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.xy);                
    half4 tapLow=tex2D(_tex1, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.wz);        
    
    // Gamma correct (for linear-space blending)
	tapHigh.rgb *= tapHigh.rgb;
	tapLow.rgb *= tapLow.rgb;
        
    half tapLerp=(tapHigh.a*2.0-1.0);        
    half4 tap=lerp(tapHigh, tapLow, saturate(tapLerp));    
    
    // Apply leak reduction. Make sure only to reduce on focused areas            
    tap.a=(tapLow.a-centerDepth+(half)dofMinThreshold>0.0)? 1: saturate(tap.a*2.0-1.0);    
   
    //half4 tap=tex2D(_tex0, IN.baseTC.xy+ poisson[t]*(half2)pixelSizes.xy);                
   	//tap.rgb *= tap.rgb; // Gamma correct (for linear-space blending)

    cOut.xyz+=tap.a*tap.xyz;
    cOut.w+=tap.a;
  }
                            
  OUT.Color = cOut/cOut.w;
  
  // Gamma correct (for linear-space blending)
  OUT.Color.rgb = sqrt(OUT.Color.rgb);
  
  return OUT;
}

////////////////// technique /////////////////////

technique CopyDepthToAlphaNoMask
{
  pass p0
  {        
    CullMode = None;        

         
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaNoMaskPS();    
  }
}

technique CopyDepthToAlphaBiasedNoMask
{
  pass p0
  {        
    CullMode = None;        
            
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaBiasedNoMaskPS();    

  }
}

technique CopyDepthToAlpha
{
  pass p0
  {        
    CullMode = None;        

    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaPS();    

  }
}

technique CopyDepthToAlphaBiased
{
  pass p0
  {        
    CullMode = None;        
            
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS CopyDepthToAlphaBiasedPS();    

  }
}

technique DepthOfField
{
  pass p0
  {        
    CullMode = None;        
    
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DofPS();    
  }
}

#if %DYN_BRANCHING_POSTPROCESS

technique DepthOfFieldHDR
{
  pass p0
  {        
    CullMode = None;        
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DofHDRPS();
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Perspective Warp      //////////////////////////////////////////////////////////////////////////

struct a2v_perspectiveWarp
{
  float4 pos : POSITION;
};

struct v2f_perspectiveWarp
{
  float4 pos : POSITION;
  float2 tex : TEXCOORDN;
};

v2f_perspectiveWarp PerspectiveWarpVS( a2v_perspectiveWarp IN )
{
  v2f_perspectiveWarp OUT = (v2f_perspectiveWarp) 0; 

	const float pi = 3.141592;

	//float aspectRatio = 1022.0 / 683.0;
	//float aspectRatio = g_VS_ScreenSize.x / g_VS_ScreenSize.y;
	float aspectRatio = 1.3333;

  float normFovX = 60.0;
  float normFovY = normFovX / aspectRatio;	
	
  //float newFovX = 110.0;
  float newFovX = 90.0;
  float newFovY = newFovX / aspectRatio;  

  float scaleX = 1.0- ( normFovX / newFovX ); 
  float scaleY = 1.0 - ( normFovY / newFovY ); 

  float ratioX = ( newFovX / normFovX ) / pi;
  float ratioY = ( newFovY / normFovY ) / pi;  

	float2 normalizedPos = IN.pos.xy;		
	normalizedPos.y = -normalizedPos.y;	

	float angX = normalizedPos.x * newFovX * 0.5;
  float angY = normalizedPos.y * newFovY * 0.5;
  
  float2 warpedPos;
  warpedPos.x = ratioX * asin( scaleX * normalizedPos.x );
  warpedPos.y = ratioY * asin( scaleY * normalizedPos.y );   
  
  warpedPos.x /= cos( angX * pi / 180.0 );
  warpedPos.y /= cos( angY * pi / 180.0 );

  //warpedPos.x += sin( scaleX * normalizedPos.x );                
  //warpedPos.y += sin( scaleY * normalizedPos.y );           

  warpedPos.x += (1.0-scaleX) * sin( angX * pi / 180.0 );                
  warpedPos.y += (1.0-scaleY) * sin( angY * pi / 180.0 );           

  OUT.pos = float4( IN.pos.xy, 0, 1 );        
  OUT.tex = warpedPos * 0.5 + 0.5;        
 


  return OUT;
}

pixout PerspectiveWarpPS( v2f_perspectiveWarp IN )
{
  pixout OUT;
  
	OUT.Color = tex2D( screenMapSampler, IN.tex.xy );			
	
  return OUT;
}

technique PerspectiveWarp
{
  pass p0
  {
    VertexShader = CompileVS PerspectiveWarpVS();       
    PixelShader = CompilePS PerspectiveWarpPS();
    CullMode = None;
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Glittering techniques//////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 glitterParams;
float4 glitterSprParams;

float4 camUpVector;
float4 camRightVector;

/// Samplers ////////////////////////////

sampler2D glitterScaledMap_d2 : register(s1);
sampler2D glitterScaledMap_d4 : register(s2);

sampler2D glitterSpriteSampler = sampler_state
{
  Texture = textures/defaults/glitter_sprite.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Clamp;
  AddressV = Clamp;
};

///////////////// vertex shader //////////////////

struct vtxInGlitterSprite
{
  IN_P    
  float3 baseTC : TEXCOORDN;
};

struct vtxOutGlitterSprite
{
  float4 HPosition  : POSITION;
  float3 baseTC     : TEXCOORDN;
  float4 glitCol    : TEXCOORDN;
  float4 screenPos  : TEXCOORDN;
};

///////////////// vertex shader //////////////////

vtxOutGlitterSprite glitterSpriteVS(vtxInGlitterSprite IN)
{

  vtxOutGlitterSprite OUT = (vtxOutGlitterSprite)0; 

  float4 vPos = float4(IN.Position.xyz, 1.0);
  
  // Get view vector  
  float3 viewVec = (vPos.xyz-g_VS_WorldViewPos.xyz);  
  
  // Compute sprite size
  float fDistToCam=length(viewVec);    
  float fSize=(fDistToCam/220.0)*clamp(glitterParams.w, 0.0, 2.0);  // 220.0f -> value tweaked by hand by MK...
        
  // Compute vertex coordinates based on camera right/up vector and texture coordinates
  float2 scale = fSize*2.0*(IN.baseTC.xy - 0.5);    
  
  vPos.z+=fSize*1.25; // try not to intersect with terrain
  vPos.xyz+=(camRightVector.xyz*scale.x + camUpVector.xyz*scale.y);
    
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = float2(IN.baseTC.x, 1-IN.baseTC.y);

  // Generate a normal based on position
  float3 normalVec = normalize(frac(IN.Position.xyz*100.0)*2.0-1.0);      
  
  // Compute attenuation term
  float attenDist=sqrt(IN.baseTC.z);    
  float fAttenuation=saturate(1.0-fDistToCam/attenDist);
     
  // Compute view dependency
  float3 camDir=normalize(-vpMatrix[2].xyz);
  float NdotV=dot(normalVec.xyz, camDir.xyz);
  
  // Compute glint term:
  // - 1. use fractional part of distance to camera
  // - 2. modulate absolute result by visibility term powered by some factor 
  float glintTerm=abs(frac((attenDist-fDistToCam)*0.5*glitterParams.x)*2-1)*saturate(4*pow(NdotV*0.5+0.5, glitterParams.y));
    
  // Final term is glint*distance attenuation
  OUT.glitCol.xyz= fAttenuation*glintTerm;
  
  // Cull sprite (todo: check performance gains, if any at all)
  if(OUT.glitCol.z<0.01)
  {
    OUT.HPosition=0;  
  }
    
  OUT.glitCol.w= NdotV;
  OUT.baseTC.z = fAttenuation;
  
  // Output screen space position for alpha masking
  OUT.screenPos = HPosToScreenTC(OUT.HPosition);
    
  return OUT;
}

///////////////// pixel shader //////////////////

// Used for glitter particles
pixout glitterSpritePS(vtxOutGlitterSprite IN)
{
  pixout OUT;
  half4 baseColor = tex2D(glitterSpriteSampler, IN.baseTC.xy);      
  half  screenAlphaColor = pow(tex2D(screenMapSampler, IN.screenPos.xy/IN.screenPos.ww).w, 16);
  
  half4  screenColor = tex2D(screenMapSampler, IN.baseTC.xy);

  // Fake chromatic Aberration  
  half4 chromAb = IN.glitCol+IN.glitCol*tex2D(rainbowSampler, IN.glitCol.ww);
  
  half3 final=chromAb.xyz*baseColor.xyz;
  // mask out alpha stuff
  OUT.Color.xyz=final.xyz;/
  
  half lum= dot(final.xyz, half3(0.33, 0.59, 0.11));
  OUT.Color.w= lum;
  
  return OUT;
}

// Glitering final pass (used if glitterGlare on)
pixout glitteringPassPS(vtxOut IN)
{
  pixout OUT;
  half4 baseColor = tex2D(_tex0, IN.baseTC.xy);      
    
  half4 glitterColor_d2 = tex2D(glitterScaledMap_d2, IN.baseTC.xy);      
  half4 glitterColor_d4 = tex2D(glitterScaledMap_d4, IN.baseTC.xy);        
  
  baseColor.xyz+=glitterColor_d2.w*glitterColor_d2.xyz*(1-baseColor.xyz)*2.0;    
  baseColor.xyz+=glitterColor_d4.w*glitterColor_d4.xyz*(1-baseColor.xyz)*2.0;    
  OUT.Color=baseColor;
  
  return OUT;
}

////////////////// technique /////////////////////

technique GlitterSprites
{
  pass p0
  {
    VertexShader = CompileVS glitterSpriteVS();
    PixelShader = CompilePS glitterSpritePS();      
    
    CullMode = None;   
    SrcBlend = ONE;
    DestBlend = ONE;
    AlphaBlendEnable = true;
    ZWriteEnable = false;
  }
}

technique GlitteringPass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS glitteringPassPS();      
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Glow technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 glowParamsPS;

/// Samplers ////////////////////////////

sampler2D glowMap_RT1 : register(s1);
sampler2D glowMap_RT2 : register(s2);
sampler2D glowMap_RT3 : register(s3);

struct vtxInGlow
{
  IN_P
  IN_TBASE
  float3 CamVec    : TEXCOORD1;  
};

struct vtxOutGlow
{
  float4 HPosition  : POSITION; 
  float2 baseTC       : TEXCOORD0;
  float3 CamVec       : TEXCOORD1;  
};

vtxOutGlow GlowGenVS(vtxInGlow IN)
{
  vtxOutGlow OUT = (vtxOutGlow)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.CamVec.xyz = IN.CamVec.xyz;

  return OUT;
}

///////////////// pixel shader //////////////////

pixout SceneLuminancePassPS(vtxOut IN)
{
  pixout OUT;
  
  half4 tex_screen = tex2D( _tex0, IN.baseTC.xy );
  
  half fLum = saturate( dot(tex_screen.xyz, float3(0.33, 0.59, 0.11))  ) ;
  
  OUT.Color = half4(fLum.xxx, 0.02);
  
  return OUT;
}

pixout GlowBrightPassPS(vtxOut IN)
{
  pixout OUT;
  
  half4 tex_screen = tex2D(_tex0, IN.baseTC.xy);
  half4 tex_glow = tex2D(_tex1, IN.baseTC.xy);
  half4 tex_eyeadjust = tex2D(_tex2, IN.baseTC.xy);
  
  tex_screen = max(tex_screen - glowParamsPS.z, 0.0)/(tex_screen+glowParamsPS.z);
  tex_screen *= (1 - tex_eyeadjust) * glowParamsPS.w;
  //tex_screen *= (1 - tex_eyeadjust);
  
  //OUT.Color = tex_screen;//1 - exp( - ( tex_screen  + tex_glow ) );
  OUT.Color =  ( tex_screen  + tex_glow )  ;
  
  return OUT;
}

////////////////////////////////////////////////////////
// Merged shader for screen-space SSS and non-HDR glow.

float3 SubsurfaceSample(sampler2D screenMap, float4 baseTC)
{
	float3 Sample = tex2Dlod( screenMap, baseTC );
	//return (Sample*Sample);
	return pow(Sample, 2.2f);
}

pixout MergeSkinAndGlowPS(vtxOut IN)
{
  pixout OUT = (pixout)0;
  
  // Sample scene.
  half4 cScreen = tex2D(_tex0, IN.baseTC.xy);
  OUT.Color = cScreen;

  //------------------------------------------------------------------------------------
  // Glow (holograms, etc.)
  //------------------------------------------------------------------------------------
  half4 tex_glow1 = tex2D(_tex1, IN.baseTC.xy);
  half4 tex_glow2 = tex2D(_tex2, IN.baseTC.xy);
  half4 tex_glow3 = tex2D(_tex3, IN.baseTC.xy);

  // Sum up results    
  half4 final_glow = (tex_glow1 + tex_glow2 + tex_glow3) * glowParamsPS.w;

  //------------------------------------------------------------------------------------
  // Screen-space SSS, 3 layer poisson (21-tap) apporimxation.
  //------------------------------------------------------------------------------------
  const float2 poisson[7] =
  {
     float2(0.527, -0.085), float2(-0.040,  0.536), float2(-0.670, -0.179), float2(-0.419, -0.616), 
     float2(0.440, -0.639), float2(-0.757,  0.349), float2(0.574,  0.685),
  };

  // Layer weights.
  const float3 cSkinWeights[4] =
  {
	   float3(0.333, 0.791, 0.993), float3(0.231, 0.205, 0.007),
	   float3(0.385, 0.004, 0.0), float3(0.078, 0.0, 0.0)
  };
  
  // Sample stretchmap/mask/blur weights from texture
  float fBlurStrength = (tex_glow1.a * tex_glow1.a);// * 2.0;//tex2D(_tex1, IN.baseTC.xy).a;
    
  // Only calculate SSS if the mask is positive (no skin, high-frequency, etc).
  if(fBlurStrength)
  {	 
	  // Calculate blur scale
  	  float2 vKernelScale = 16.0f * fBlurStrength * PS_ScreenSize.zw;// * 2.0f;
	  float3 cLayer0 = pow(cScreen, 2.2f);
 	  //float3 cLayer0 = (cScreen*cScreen);
 
	  // Second layer.
	  float3 cLayer1 = cLayer0;
	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer1 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer1 *= 0.125f;

	  // Scale kernel for next layer.
	  vKernelScale *= float2(2.0, -2.0);

	  // Third layer.
	  float3 cLayer2 = cLayer1;
	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer2 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer2 *= 0.125f;
	  
	  // Scale kernel for next layer.
	  vKernelScale *= float2(-4.0, -4.0);
	  
	  // Fourth layer.
	  float3 cLayer3 = cLayer2;
	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[0] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[1] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[2] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[3] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[4] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[5] * vKernelScale, 0.0, 0.0));
  	  cLayer3 += SubsurfaceSample(_tex0, float4( IN.baseTC.xy + poisson[6] * vKernelScale, 0.0, 0.0));
	  cLayer3 *= 0.125f;
	  	  	  	  
	  // Sum each layer with respective weights.
      float3 cSkinDiffusion = cLayer0 * cSkinWeights[0];
      cSkinDiffusion += cLayer1 * cSkinWeights[1];			
	  cSkinDiffusion += cLayer2 * cSkinWeights[2];		
	  cSkinDiffusion += cLayer3 * cSkinWeights[3];		
	              
	  // Back to gamma space.     
  	  OUT.Color.rgb = pow(cSkinDiffusion, 1.0f/2.2f);
  	  //OUT.Color.rgb = sqrt(cSkinDiffusion);
  }
   
  //------------------------------------------------------------------------------------
  // Glow output (holograms, etc.)
  //------------------------------------------------------------------------------------
              
  // Apply glow at the very end.
  OUT.Color.rgb += final_glow; 

  return OUT;
}

////////////////// technique /////////////////////

technique SceneLuminancePass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS SceneLuminancePassPS();    
    CullMode = None;        
  }
}

technique GlowBrightPass
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS GlowBrightPassPS();    
    CullMode = None;        
  }
}

technique MergeSkinAndGlow
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS MergeSkinAndGlowPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// GlowScene: copies glow into backbuffer /////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

////////////////// samplers /////////////////////

/////////////////////////////////////////////////
pixout GlowScenePS(vtxOut IN)
{
  pixout OUT;
  
  half4 cGlow = tex2D(_tex0, IN.baseTC.xy);
  OUT.Color = float4(cGlow.rgb, 1.0);
  
  return OUT;
}

////////////////// technique /////////////////////
technique GlowScene
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS GlowScenePS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// EncodeHDRGlow: encodes HDR glow into LDR glow texture //////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

////////////////// samplers /////////////////////

pixout EncodeHDRtoLDRPS(vtxOut IN)
{
  pixout OUT;
  
  OUT.Color = EncodeRGBS( tex2D( _tex0, IN.baseTC.xy) );
  
  return OUT;
}

////////////////// technique /////////////////////
technique EncodeHDRtoLDR
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS EncodeHDRtoLDRPS();
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// SunShafts technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 PI_sunShaftsParams;
float4 sunShaftsParams;
float4x4 SunShafts_ViewProj;
float4 SunShafts_SunPos;

struct vtxOutSunShaftsGen
{
  float4 HPosition  : POSITION; 
  float2 baseTC       : TEXCOORD0;
  float4 sunPos       : TEXCOORD1;  
};

/// Samplers ////////////////////////////

vtxOutSunShaftsGen SunShaftsGenVS(vtxIn IN)
{
  vtxOutSunShaftsGen OUT = (vtxOutSunShaftsGen)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  
  float4 SunPosH = mul(SunShafts_ViewProj, SunShafts_SunPos);
  OUT.sunPos.x = (SunPosH.x + SunPosH.w) * 0.5 ;
  OUT.sunPos.y = (-SunPosH.y + SunPosH.w) * 0.5 ;
  OUT.sunPos.z = SunPosH.w;
  
  OUT.sunPos.w = (dot(normalize(SunShafts_SunPos).xyz, SunShafts_ViewProj[2].xyz));

  return OUT;
}

///////////////// pixel shader //////////////////

pixout SunShaftsMaskGenPS(vtxOutTexToTex IN)
{
  pixout OUT;
  
  int nQuality = GetShaderQuality();
  
  half4 scene = 0;
  half sceneDepth = 0;
  if( nQuality == QUALITY_HIGH )
  {
    half sceneDepth0 = tex2D(_tex0, IN.baseTC0.xy).r;
    half sceneDepth1 = tex2D(_tex0, IN.baseTC1.xy).r;
    half sceneDepth2 = tex2D(_tex0, IN.baseTC2.xy).r;
    half sceneDepth3 = tex2D(_tex0, IN.baseTC3.xy).r;
    half sceneDepth4 = tex2D(_tex0, IN.baseTC4.xy).r;    
    sceneDepth = (sceneDepth0 + sceneDepth1 + sceneDepth2 + sceneDepth3 + sceneDepth4) * 0.2;
    
    half4 scene0 = tex2D(_tex1, IN.baseTC0.xy);
    half4 scene1 = tex2D(_tex1, IN.baseTC1.xy);
    half4 scene2 = tex2D(_tex1, IN.baseTC2.xy);
    half4 scene3 = tex2D(_tex1, IN.baseTC3.xy);
    half4 scene4 = tex2D(_tex1, IN.baseTC4.xy);
    scene = (scene0 + scene1 + scene2 + scene3 + scene4) * 0.2;
  }
  else
  {
    sceneDepth = tex2D(_tex0, IN.baseTC0.xy).r;
    scene = tex2D(_tex1, IN.baseTC0.xy);
  }

  //half fMask = saturate( 8*(1-abs(sceneDepth*2-1)) ); 
  ///half fCloudsMask = 1 - saturate(tex2D(_tex1, IN.baseTC.xy).w*2-1);  
  half fShaftsMask = (1 - sceneDepth);  
  
  OUT.Color = half4( scene.xyz * saturate(sceneDepth), fShaftsMask );

  return OUT;
}

pixout SunShaftsGenPS(vtxOutSunShaftsGen IN)
{
  pixout OUT;
  
  float2 sunPosProj = ((IN.sunPos.xy / IN.sunPos.z));
  
  float fSign = (IN.sunPos.w);
  
  float2 sunVec = ( sunPosProj.xy - IN.baseTC.xy);
  
  float fAspectRatio =  1.333 * PS_ScreenSize.y /PS_ScreenSize.x;
  
  float sunDist = saturate(fSign) * saturate( 1 - saturate(length(sunVec * float2(1, fAspectRatio))*PI_sunShaftsParams.y));// * 
                            //saturate(saturate(fSign)*0.6+0.4  ) );
                            // *(1.0 - 0.2*(1- sin(AnimGenParams) ) pass variation per constant
  float2 sunDir =  ( sunPosProj.xy - IN.baseTC.xy);
   
  
  half4 accum = 0; 
  sunDir.xy *= PI_sunShaftsParams.x * fSign;
  
  const float numSamples = 8;
  
#if D3D10
  [unroll]
#endif
  for(int i=0; i<numSamples; i++)
  {
    half4 depth = tex2D(_tex0, (IN.baseTC.xy + sunDir.xy * i) );      
    accum += depth * (1.0-i/numSamples);
  }
  
  accum /= numSamples;
  
  OUT.Color = accum * 2  * float4(sunDist.xxx, 1);
  OUT.Color.w += 1-saturate(saturate(fSign*0.1+0.9));
  //OUT.Color.xyz *=1- saturate(0.5-0.5* fSign);
    
  return OUT;
}

pixout SunShaftsDisplayPS(vtxOut IN)
{
  pixout OUT;

  // Gamma correct input colors.
  HDRGammaCorrectInputColor(g_PS_SunColor);

  half4 cScreen = tex2D(_tex0, IN.baseTC.xy);      
  half4 cSunShafts = tex2D(_tex1, IN.baseTC.xy);

  half fShaftsMask = saturate(1.00001- cSunShafts.w) *sunShaftsParams.x * 2.0;
  //fShaftsMask -= saturate(0.00001 + cSunShafts.w) * sunShaftsParams.x;
      
  // Apply "very" subtle (but always visible) sun shafts mask 
  float fBlend = cSunShafts.w;
  
  // normalize sun color (dont wanna huge values in here)
  float4 sunColor = 1;
  sunColor.xyz = normalize(g_PS_SunColor.xyz);
  
  // 
  OUT.Color =  cScreen + cSunShafts.xyzz * sunShaftsParams.y * sunColor * ( 1 - cScreen );
  OUT.Color = BlendSoftLight(OUT.Color, sunColor * fShaftsMask *0.5+0.5);
      
  return OUT;
}

////////////////// technique /////////////////////

technique SunShaftsMaskGen
{
  pass p0
  {
    VertexShader = CompileVS TexToTexVS();
    PixelShader = CompilePS SunShaftsMaskGenPS();    
    CullMode = None;        
  }
}

technique SunShaftsGen
{
  pass p0
  {
    VertexShader = CompileVS SunShaftsGenVS();
    PixelShader = CompilePS SunShaftsGenPS();    
    CullMode = None;        
  }
}

technique SunShaftsDisplay
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS SunShaftsDisplayPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth Enhancement technique ////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4 vDepthEnhancementParams;

///////////////// pixel shader //////////////////

pixout DepthEnhancementPS(vtxOut IN)
{
  pixout OUT;
      
  half4 cScreen = tex2D(_tex0, IN.baseTC.xy );      
  
  float2 vSamples[8] =
  {
    -float2(0, 1),
    -float2(1, 0),
    float2(0, 1),
    float2(1, 0),
    
    float2(0.5, 0.85),
    float2(0.85, 0.5),
    -float2(0.5, 0.85),
    -float2(0.85, 0.5),
  };
  
  float fDepth = GetDepthMapScaled(_tex1, IN.baseTC.xy );  
  
  
  float fDepthBlur = 0;
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 * vSamples[0] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[1] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[2] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[3] / ScrSize.xy) ), 1.0) , -1.0) ;  
  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 * vSamples[4] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[5] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[6] / ScrSize.xy) ), 1.0) , -1.0) ;  
  fDepthBlur += max( min( ( fDepth - GetDepthMapScaled(_tex1, IN.baseTC.xy + 2 *vSamples[7] / ScrSize.xy) ), 1.0) , -1.0) ;    
  
  fDepthBlur /= 8.0;
  
  fDepth *= PS_NearFarClipDist.y;
  //fDepthBlur *= PS_NearFarClipDist.y;    
  
  //OUT.Color = lerp(0.5, cScreen, max( 1-abs( fDepthLow - fDepth + 0.01)), 0) ); //1 - cScreen;
  
  //OUT.Color = lerp(0.5, cScreen,  1 + 0.5 * saturate( 100 * max( abs( fDepthLow - fDepth), 0) ));
  //OUT.Color =  cScreen * (1 - abs(fDepthBlur)*0.5);//lerp(dot(cScreen, float4(0.33, 0.59, 0.11, 0)), cScreen, 1.0 - fDepthBlur ); //max( min( ( fDepth - fDepthBlur ), 1.0) , -1.0) ;
  
  //OUT.Color =  lerp(0.5, cScreen, 1 + min( abs(fDepthBlur), 1.5) ); //max( min( ( fDepth - fDepthBlur ), 1.0) , -1.0) ;  
    
  OUT.Color = saturate(1- abs(fDepthBlur) )* cScreen;
  //OUT.Color = cScreen;
  
  //saturate( max( abs( fDepth - fDepthLow), 0) ); //saturate(fDepthLow > fDepth + 1.0 );
      
  return OUT;
}

////////////////// technique /////////////////////

technique DepthEnhancement
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DepthEnhancementPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Chroma Shift technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

///////////////// pixel shader //////////////////

pixout ChromaShiftPS(vtxOut IN)
{
  pixout OUT;
      
  half4 cScreen = 0;
  
  cScreen.x = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].x) + 0.5).x;      
  cScreen.y = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].y) + 0.5).y;      
  cScreen.z = tex2D(_tex0, (IN.baseTC.xy-0.5) * (1.0 - psParams[0].z) + 0.5).z;      
    
  OUT.Color = cScreen;

  return OUT;
}

////////////////// technique /////////////////////

technique ChromaShift
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS ChromaShiftPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// UnderwaterView technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

sampler2D underwaterBumpSampler = sampler_state
{
  Texture = textures/defaults/screen_noisy_bump.dds;
  MinFilter = LINEAR;  
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
};

///////////////// pixel shader //////////////////

pixout UnderwaterViewPS(vtxOut IN)
{
  pixout OUT;
    
#if D3D10	
  // temporary workaround for d3d10 hlsl compiler bug
  OUT.Color = tex2D(screenMapSampler, IN.baseTC.xy);
#endif

  float anim = frac(AnimGenParams*0.01);  
  float3 vec = normalize(float3(IN.baseTC.xy *2-1, 1));
  half4 cBumpy = tex2D(underwaterBumpSampler, IN.baseTC.xy*0.025 + anim )*2-1;
  cBumpy += tex2D(underwaterBumpSampler, IN.baseTC.yx*0.033 - anim )*2-1;
  cBumpy.xyz = normalize( cBumpy ).xyz;
      
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy + cBumpy.xy*0.01);
  
  OUT.Color = cScreen;

  return OUT;
}

////////////////// technique /////////////////////

technique UnderwaterView
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS UnderwaterViewPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// UnderwaterGodRays technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 vpGodMatrix  : PI_Composite; // View*Projection
float4x4 vpGodMatrixI : PB_UnProjMatrix; // invert( View * projection )

float4 CausticsAmbient  : PI_Ambient;
float4 CausticParams	  : PB_CausticsParams;  // xy: caustics distance, zw: 1 / caustics distance


float4 PI_GodRaysParamsVS;
float4 PI_GodRaysParamsPS;
float4 PI_GodRaysSunDirVS;
float4 CausticSmoothSunDir	: PB_CausticsSmoothSunDirection; 


sampler2D wavesSampler = sampler_state
{
  Texture = textures/defaults/oceanwaves_ddn.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR;
  AddressU = Wrap;
  AddressV = Wrap;	
};

sampler2D causticsSampler = sampler_state
{
  Texture = textures/defaults/caustics_sampler.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = NONE;
  AddressU = Clamp;
  AddressV = Clamp;	
};

float g_fWaterLevel
<
  Position;
> = {PB_WaterLevel};

struct vtxOutGodRays
{
  float4 HPosition  : POSITION; 
  float4 baseTC    : TEXCOORDN; // zw unused
  
  float4 waveTC      : TEXCOORDN;
  float4 causticTC0  : TEXCOORDN;
  float4 causticTC1  : TEXCOORDN;
  
  float4 vPosition : TEXCOORDN;  // w unused   
};

/// Samplers ////////////////////////////

vtxOutGodRays UnderwaterGodRaysVS(vtxIn IN)
{
  vtxOutGodRays OUT = (vtxOutGodRays)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy *2 - 1);
  
  vPos.xy *= 1.2; // hack: make sure to cover entire screen
  
  // Increase each slice distance
  vPos.z = 0.1+ 0.88 * saturate(PI_GodRaysParamsVS.z * PI_GodRaysParamsVS.w);
  //vPos.z = 0.4+ 0. * saturate(vsParams[0].z * vsParams[0].w);
  vPos.w = 1;
  
  // Project back to world space
  vPos = mul(vpGodMatrixI, vPos );
  vPos /= vPos.w;
 
  OUT.HPosition = mul(vpGodMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.baseTC.y =  1 - OUT.baseTC.y;

  OUT.vPosition.xyz = vPos;
  OUT.vPosition.w = 1;
    
  // Generate projection matrix based on sun direction  
  float3 dirZ = CausticSmoothSunDir.xyz;
  float3 up = float3(0,0,1);
  float3 dirX = normalize(cross(up, dirZ));
  float3 dirY = normalize(cross(dirZ, dirX));

  float3x3 mLightView;
  mLightView[0] = dirX.xyz;
  mLightView[1] = dirY.xyz;
  mLightView[2] = dirZ.xyz;
   
  // Output caustics procedural texture generation 
  float2 uv = mul(mLightView, OUT.vPosition.xyz).xy*0.5;
  
  // half tilling used to avoid annoying aliasing when swimming fast
  OUT.waveTC.xy =  uv * 2 * 0.01 * 0.012 + g_VS_AnimGenParams.w * 0.06;
  OUT.waveTC.wz =  uv * 2 * 0.01 * 0.01 + g_VS_AnimGenParams.w * 0.05;

  OUT.causticTC0.xy =  uv * 0.01 * 0.5 *2+ g_VS_AnimGenParams.w * 0.1;
  OUT.causticTC0.wz =  uv.yx * 0.01 * 0.5 *2- g_VS_AnimGenParams.w * 0.11;  

  OUT.causticTC1.xy =  uv * 0.01 * 2.0 *2+ g_VS_AnimGenParams.w * 0.1;
  OUT.causticTC1.wz =  uv.yx * 0.01 * 2.0 *2- g_VS_AnimGenParams.w * 0.11;  

  return OUT;
}

///////////////// pixel shader //////////////////

pixout UnderwaterGodRaysPS(vtxOutGodRays IN)
{
  pixout OUT;
    
  half4 cScreen =  tex2D(screenMapSampler, IN.baseTC.xy);
      
  // break movement, with random patterns
  float3 wave = 0;
  wave.xy = FetchNormalMap( wavesSampler, IN.waveTC.xy).xy;                                                  // 1 tex
  wave.xy += FetchNormalMap( wavesSampler, IN.waveTC.wz).xy;                                                 // 1 tex, 1 alu

  // Normalization optimization:
  //  - Instead of using GetNormalMap everywhere, which costs 3 alu per lookup, merge both
  //  bumps together, do single normalize after  
  
  // fast normalize
  wave.xy = wave.xy - 1.0;                                                                          // 1 alu
  wave.z = sqrt(1.0 - dot(wave.xy, wave.xy));                                                       // 2 alu    

  wave *= 0.02;                                                                                     // 1 alu  

  half3 causticMapR = 0;
  causticMapR.xy = FetchNormalMap( wavesSampler, IN.causticTC0.xy + wave.xy).xy;     // 1 tex + 2 alu
  causticMapR.xy += FetchNormalMap(wavesSampler, IN.causticTC0.wz + wave.xy).xy;     // 1 tex + 3 alu
   
  // fast normalize  
  causticMapR.xy = causticMapR.xy - 1.0;                                                            // 1 alu
  causticMapR.z = sqrt(1.0 - dot(causticMapR.xy, causticMapR.xy));                                  // 2 alu    
  
  half2 causticHighFreq = 0;
  causticHighFreq = FetchNormalMap( wavesSampler, IN.causticTC1.xy + wave.xy ).xy;   // 1 tex  + 1 alu
  causticHighFreq += FetchNormalMap( wavesSampler, IN.causticTC1.wz + wave.xy ).xy;   // 1 tex  + 2 alu
  causticHighFreq = causticHighFreq * 2.0 - 2.0;                                                    // 1 alu

  causticMapR.xy += causticHighFreq;  

  // Caustics sampler contains function: abs( 1-(abs( a) + abs(b))*0.5 ), which generates nice sharp pattern  
  half3 cCaustic;
  cCaustic.x = tex2D(causticsSampler, causticMapR.xy*0.55+0.55).x;
  cCaustic.y = tex2D(causticsSampler, causticMapR.xy*0.525+0.525).x;
  cCaustic.z = tex2D(causticsSampler, causticMapR.xy*0.5+0.5).x;
  
  float slice_pos = PI_GodRaysParamsPS.z * PI_GodRaysParamsPS.w;    
  
  // sharpen up a bit
  cCaustic *= cCaustic;
  
  // add very sharp highlight
  const half cMaxHightVis = 10.0;
  half fHighlightAtten =  1;//cMaxHightVis / (CausticParams.x - IN.vPosition.z);                         // 2 alu    
  fHighlightAtten = saturate( fHighlightAtten ) * min( abs( fHighlightAtten ), 2);  
  
  half fAtten =1;// saturate( (CausticParams.x - IN.vPosition.z)*4 );                                          // 2 alu  
  
  cCaustic += pow( cCaustic, 8 );
  
  //half4 cScreen =  tex2D(screenMapSampler, IN.baseTC.xy);
  cScreen.xyz = cCaustic * PI_GodRaysParamsPS.w  * PI_GodRaysParamsPS.y * saturate( CausticParams.y  )* 0.25;
  
  
  half fDistToCam = length( WorldViewPos.xyz - IN.vPosition.xyz );                                      // 2 alu
  
  // 4 alu
    
  fAtten *= ( slice_pos );
  
  cScreen.xyz *= fAtten *fHighlightAtten;
  
  OUT.Color = cScreen;

  return OUT;
}

pixout UnderwaterGodRaysFinalPS(vtxOut IN)
{
  pixout OUT;

  half4 c0 = tex2D(screenMapSampler, IN.baseTC.xy);
  float anim = frac(AnimGenParams*0.01);  
  float3 vec = normalize(float3(IN.baseTC.xy *2-1, 1));
  half4 cBumpy = tex2D(underwaterBumpSampler, IN.baseTC.xy*0.025 + anim )*2-1;
  cBumpy += tex2D(underwaterBumpSampler, IN.baseTC.yx*0.033 - anim )*2-1;
  cBumpy.xyz = normalize( cBumpy ).xyz;
    
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy + cBumpy.xy*0.0125); 
  half4 cCaustics = tex2D(screenMapScaledSampler_d4, IN.baseTC.xy + cBumpy.xy*0.01);
          
  OUT.Color = cScreen + cCaustics;

  return OUT;
}
 
////////////////// technique /////////////////////

technique UnderwaterGodRays
{
  pass p0
  {
    VertexShader = CompileVS UnderwaterGodRaysVS();
    PixelShader = CompilePS UnderwaterGodRaysPS();    
    CullMode = None;        
  }
}

technique UnderwaterGodRaysFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS UnderwaterGodRaysFinalPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Volumetric scattering technique ////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4 PI_volScatterParamsVS;
float4 PI_volScatterParamsPS;
float4 VolumetricScattering;  // x: tilling, y: speed
float4 VolumetricScatteringColor; 

sampler3D volumeMapSampler = sampler_state
{  
  Texture = textures/defaults/Noise3D.dds;
  MinFilter = LINEAR;
  MagFilter = LINEAR;
  MipFilter = LINEAR; 
  AddressU = Wrap;
  AddressV = Wrap;
  AddressW = Wrap;
};


struct vtxOutVolumetricScattering
{
  float4 HPosition  : POSITION; 
  float4 baseTC    : TEXCOORDN; // zw unused
  
  float4 vPosition0 : TEXCOORDN;  // w unused   
  float4 vPosition1 : TEXCOORDN;  // w unused   
};

/// Samplers ////////////////////////////

vtxOutVolumetricScattering VolumetricScatteringVS(vtxIn IN)
{
  vtxOutVolumetricScattering OUT = (vtxOutVolumetricScattering)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy + g_VS_ScreenSize.zw * 0.5 )*2 - 1 ; 
  
  // Increase each slice distance
  vPos.z = 0.5 + 0.5*saturate(PI_volScatterParamsVS.z * PI_volScatterParamsVS.w);;
  vPos.w = 1;
  
  // Project back to world space
  vPos = mul(vpGodMatrixI, vPos );
  vPos /= vPos.w;
 
  OUT.HPosition = mul(vpGodMatrix, vPos);  
  
  OUT.baseTC.xy = IN.baseTC.xy;
  OUT.baseTC.y =  1 - OUT.baseTC.y;
  
  vPos *= VolumetricScattering.x;
  g_VS_AnimGenParams.w *= VolumetricScattering.y;
  
  OUT.vPosition0.xyz = vPos*0.1 + g_VS_AnimGenParams.w *0.2;
  OUT.vPosition1.xyz = vPos*0.11 - g_VS_AnimGenParams.w *0.3;
    
  return OUT;
}

///////////////// pixel shader //////////////////

pixout VolumetricScatteringPS(vtxOutVolumetricScattering IN)
{
  pixout OUT;
  
  half4 cScreen;
  float fVolume = 1 - abs(tex3D(volumeMapSampler, IN.vPosition0 ).w*2-1);
  fVolume += 1 - abs(tex3D(volumeMapSampler, IN.vPosition1).w*2-1);
  fVolume *=0.5;
    
  fVolume *= fVolume;
  fVolume *= fVolume;
  fVolume *= fVolume;
  //fVolume *= fVolume;
  
  OUT.Color = fVolume * PI_volScatterParamsPS.w  * PI_volScatterParamsPS.y * CausticParams.y * VolumetricScatteringColor;

  return OUT;
}

pixout VolumetricScatteringFinalPS(vtxOut IN)
{
  pixout OUT;
  
  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy);  
  half4 cVolume = tex2D(screenMapScaledSampler_d4, IN.baseTC.xy);

  OUT.Color = cScreen + cVolume;

  return OUT;
}

////////////////// technique /////////////////////

technique VolumetricScattering
{
  pass p0
  {
    VertexShader = CompileVS VolumetricScatteringVS();
    PixelShader = CompilePS VolumetricScatteringPS();    
    CullMode = None;        
  }
}

technique VolumetricScatteringFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS VolumetricScatteringFinalPS();    
    CullMode = None;        
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Distant rain technique /////////////////////////////////////////////////////////////////////////

/// Constants ////////////////////////////

float4x4 mComposite  : PI_Composite; // View*Projection
float4x4 mUnproject  : PB_UnProjMatrix; // invert( View * projection )
float4 cRainColor;
float4 PI_RainParamsVS;
float4 PI_RainParamsPS;

struct vtxOutDistantRain
{
  float4 HPosition  : POSITION; 
  float4 vPosition : TEXCOORDN;  // w unused   
  float4 vPosition2 : TEXCOORDN;  // w unused   
  float4 tcProj     : TEXCOORDN;
};

/// Samplers ////////////////////////////

vtxOutDistantRain DistantRainVS(vtxIn IN)
{
  vtxOutDistantRain OUT = (vtxOutDistantRain)0; 

  // Position in screen space.
  float4 vPos = IN.Position;
  vPos.xy = (vPos.xy *2 - 1);
  
  vPos.xy *= 1.2; // hack: make sure to cover entire screen
  
  // Increase each slice distance
  //vPos.z = 0.1+ 0.88 * saturate(vsParams[0].z * vsParams[0].w);
  
  //vPos.z = 0.99+ 0.0025 * saturate(vsParams[0].z * vsParams[0].w);
  vPos.z = 0.005+0.99+ 0.0025 *  saturate((PI_RainParamsVS.z * PI_RainParamsVS.w ));
 
  OUT.HPosition = mul(mComposite, vPos);  
  //OUT.HPosition.z = 0;
  

  OUT.tcProj = HPosToScreenTC( OUT.HPosition );

  OUT.vPosition.xyz = vPos + PI_RainParamsVS.x * float3(0, 0, 100*g_VS_AnimGenParams.x* ((PI_RainParamsVS.w*0.5+0.5)));
  OUT.vPosition2.xyz = vPos+ PI_RainParamsVS.x * float3(0, 0, 500*g_VS_AnimGenParams.x* ((PI_RainParamsVS.w*0.5+0.5)));
  OUT.vPosition.w = 1;
  OUT.vPosition2.w = 1;
    
  // Generate projection matrix based on sun direction  
  float3 dirZ = -g_VS_SunLightDir;
  float3 up = float3(0,0,1);
  float3 dirX = normalize(cross(up, dirZ));
  float3 dirY = normalize(cross(dirZ, dirX));

  float3x3 mLightView;
  mLightView[0] = dirX.xyz;
  mLightView[1] = dirY.xyz;
  mLightView[2] = dirZ.xyz;
   
  // Output caustics procedural texture generation 
  float2 uv = OUT.vPosition.xy; //mul(mLightView, OUT.vPosition.xyz).xy*0.5;

  OUT.vPosition.w =  vPos.z;//uv * 0.01 * 0.5 *2+ g_VS_AnimGenParams.w * 0.1;


  return OUT;
}

///////////////// pixel shader //////////////////

pixout DistantRainPS(vtxOutDistantRain IN)
{
  pixout OUT;
            
  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Simulate distant rain with 3 noisy layers

  OUT.Color = saturate(tex3D( volumeMapSampler, IN.vPosition.xyz*0.45*float3(1,1,0.05)*0.1).w)*0.025*4;
  OUT.Color += saturate(tex3D( volumeMapSampler, IN.vPosition.xyz*0.3*float3(1.1,2.09,0.34)*0.1).w*2-0.8)*0.05;

  // Store current value - will be used for hits look variation
  half fHitMask = OUT.Color.x;  

  OUT.Color *= 0.5;
  OUT.Color *= saturate(tex3D( volumeMapSampler, IN.vPosition2.xyz*0.245*float3(1,1,0.1)*0.0005).w*0.5+0.5);

  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Compute softintersection coeficients with surfaces and water plane

  half fSceneDepth = GetDepthMap(depthMapSampler, IN.tcProj.xy / IN.tcProj.w);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams0.xy);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams0.zw);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams1.xy);
  fSceneDepth += GetDepthMap(depthMapSampler, (IN.tcProj.xy / IN.tcProj.w) + texToTexParams1.zw);
  fSceneDepth *= PS_NearFarClipDist.y *0.2f;
  
  float fRainDepth = IN.tcProj.w; 	

 	half softIntersect = saturate( 0.25* ( fSceneDepth - fRainDepth ));
  float fWaterSoftIsec = saturate(0.25 * (IN.vPosition.w - g_fWaterLevel));

  //////////////////////////////////////////////////////////////////////////////////////////////////
  // Simulate surface hits/splashes
  
  // Compute ground and water plane intersection
  half fGroundHit = (1-saturate( 0.05* ( fSceneDepth - fRainDepth) ))*0.5;
  half fWaterHit = 1-saturate( 0.5* (IN.vPosition.w - g_fWaterLevel));

  // Sum up hits
  fGroundHit += fWaterHit;  

  // Apply hit mask to simulate water splashes
  fHitMask = saturate(saturate(fHitMask)*4-0.2);    
  fGroundHit *= fHitMask;


  OUT.Color += fGroundHit;

  // Apply soft-intersection with surfaces and water plane
  OUT.Color *=  (1-PI_RainParamsPS.w) *0.5 *softIntersect*fWaterSoftIsec * PI_RainParamsPS.y * cRainColor;

  return OUT;
}

pixout DistantRainFinalPS(vtxOut IN)
{
  pixout OUT;

  half4 cScreen = tex2D(screenMapSampler, IN.baseTC.xy);
  half4 cRain = tex2D(screenMapScaledSampler_d2, IN.baseTC.xy);
  OUT.Color = cScreen + cRain;

  return OUT;
}

////////////////// technique /////////////////////

technique DistantRain
{
  pass p0
  {
    VertexShader = CompileVS DistantRainVS();
    PixelShader = CompilePS DistantRainPS();    
    CullMode = None;        
  }
}

technique DistantRainFinal
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS DistantRainFinalPS();    
    CullMode = None;        
  }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
/// Water puddles texgen technique //////////////////////////////////////////////////////////////////////////

/// Specific data ////////////////////////

/// Constants ////////////////////////////

float4 waterPuddlesParams;

///////////////// vertex shader //////////////////

struct vtxOutWaterPuddles
{
  float4 HPosition  : POSITION;
  float2 baseTC    : TEXCOORDN;
  float4 noiseTC    : TEXCOORDN;  
};

vtxOutWaterPuddles waterPuddlesVS(vtxIn IN)
{
  vtxOutWaterPuddles OUT = (vtxOutWaterPuddles)0; 

  float4 vPos = IN.Position;
  OUT.HPosition = mul(vpMatrix, vPos);  
  OUT.baseTC.xy = IN.baseTC.xy;
 
  return OUT;
}

float gaussian(float d2, float radius)
{
  return exp(-d2 / radius);
  //return saturate( 1- (d2*d2/radius) );
  
}

///////////////// pixel shader //////////////////
pixout waterPuddlesPS(vtxOutWaterPuddles IN)
{
  pixout OUT;
  
  float fvar = 0;
  //float2 vDropPos = 0.5;
  float2 vDropPos = (waterPuddlesParams.xy*2-1); //0.25 * float2(cos(AnimGenParams*4 + fvar), sin(AnimGenParams*4 + fvar));
  //(frac(AnimGenParams)>=0.5) *
  
   

   float2 offsets[4] = 
   {
      1,  0,
      -1, 0,         
      0,  1,
      0, -1,
   };

   float4 c = tex2D(_tex0,  IN.baseTC).x;
   float4 d = tex2D(_tex1,  IN.baseTC).x;
   float fDilateRatio = 1.0;
   float fSpeedFactor = 0.3333;
   float2 fPixSize = 1.0 / 256.0;
   float fDamping = 0.9985;//5;//95;//8;
      
   
   float4 l, r, t, b;
   l = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[0]).x;
   r = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[1]).x;
   t = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[2]).x;
   b = tex2D(_tex0,  IN.baseTC + 1*fDilateRatio*fPixSize * offsets[3]).x; 

   float fA = fSpeedFactor;
   float fB = 2.0 - 4.0 * fSpeedFactor;
  
   float sum = (r.x + l.x + t.x + b.x) * fA + fB * c.x - d.x;
   //float sum = (r.x + l.x + t.x + b.x) * 0.5 - d.x;
             
   OUT.Color = ( float4(sum.xxx, 1) *fDamping);  
   
   //(frac(AnimGenParams)>=0.5) *
   OUT.Color +=  waterPuddlesParams.w * gaussian( length(abs( frac(IN.baseTC.xy-vDropPos)*2-1 ) ) ,2.0/256.0 );// tex2D( _tex0, IN.baseTC.xy);

   //OUT.Color.xyz = 1-exp(-1.05*OUT.Color.x);
  
  return OUT;
}

pixout waterPuddlesDisplayPS(vtxOut IN)
{
  pixout OUT;

  float3 vWeights = 0;    
  vWeights.x = (tex2D( _tex0, IN.baseTC.xy ).x);
  vWeights.y = (tex2D( _tex0, IN.baseTC.xy + float2(1,0)/waterPuddlesParams.w).x);
  vWeights.z = (tex2D( _tex0, IN.baseTC.xy + float2(0,1)/waterPuddlesParams.w).x);
  
  // make it a bit sharper (maybe add a sharpening control)
  vWeights = ( vWeights *2 - 1 );
      
  float3 vNormal = float3( vWeights.x - vWeights.y, vWeights.x - vWeights.z,1);                  // 2 inst
  vNormal = normalize(vNormal.xyz);                                                              // 3 inst
 
  OUT.Color.xyz =vNormal*0.5+0.5;// tex2D( _tex0, IN.baseTC.xy);
  OUT.Color.w = vWeights.x*0.5+0.5;

  return OUT;
}

////////////////// technique /////////////////////

technique WaterPuddlesGen
{
  pass p0
  {
    VertexShader = CompileVS waterPuddlesVS();
    PixelShader = CompilePS waterPuddlesPS();    
    CullMode = None;    
  }
}

technique WaterPuddlesDisplay
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();
    PixelShader = CompilePS waterPuddlesDisplayPS();    
    CullMode = None;    
  }
}

#if %DYN_BRANCHING_POSTPROCESS

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Fillrate Profile technique /////////////////////////////////////////////////////////////////////////

pixout FilrateProfilePS(vtxOut IN)
{
  pixout OUT;
  
  const int nSamples = 32;
  float fRecipSamples = 1.0 / (float) nSamples ;
  
  half4 acc = 0;
#if D3D10
  [unroll]
#endif
  for(int n = 0; n < nSamples; n++)
  {
    acc += tex2D(_tex0, IN.baseTC.xy) + (frac(n * fRecipSamples*10)*2-1)*4;
  }

  OUT.Color = acc * fRecipSamples;

  return OUT;
}

////////////////// technique /////////////////////

technique FillrateProfile
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS FilrateProfilePS();
    CullMode = None;        
  }
}

#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
/// Grain filter technique /////////////////////////////////////////////////////////////////////////

sampler2D grainNoiseSampler = sampler_state
{
  Texture = textures/defaults/vector_noise.dds;
  MinFilter = POINT;  
  MagFilter = POINT;
  MipFilter = POINT; 
  AddressU = Wrap;
  AddressV = Wrap;
};


pixout GrainFilterPS(vtxOut IN)
{
  pixout OUT;
  
  
  half4 acc = 0;

  float2 vNoiseTC = (IN.baseTC.xy ) * (PS_ScreenSize.xy/64.0) +  (psParams[0].xy/PS_ScreenSize.xy);
  float2 vNoise = tex2D(grainNoiseSampler, vNoiseTC)+ dot(IN.baseTC.xy, 1) * 65535;
  vNoise = frac( vNoise );

  vNoise = vNoise*2-1;
  //vNoise *= 0.05;

  half4 cScreen = tex2D(screenMapSampler, IN.baseTC);

  OUT.Color = cScreen + dot(vNoise.xy, 0.5)*psParams[0].w;


  return OUT;
}

////////////////// technique /////////////////////

technique GrainFilter
{
  pass p0
  {
    VertexShader = CompileVS BaseVS();            
    PixelShader = CompilePS GrainFilterPS();
    CullMode = None;        
  }
}

*//**//**//**//**//**//**//**//**//**//**//**//**//**/